macromem.cc revision 10199:6cf40d777682
1/*
2 * Copyright (c) 2010-2013 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include <sstream>
44
45#include "arch/arm/insts/macromem.hh"
46
47#include "arch/arm/generated/decoder.hh"
48#include "arch/arm/insts/neon64_mem.hh"
49
50using namespace std;
51using namespace ArmISAInst;
52
53namespace ArmISA
54{
55
56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57                       OpClass __opClass, IntRegIndex rn,
58                       bool index, bool up, bool user, bool writeback,
59                       bool load, uint32_t reglist) :
60    PredMacroOp(mnem, machInst, __opClass)
61{
62    uint32_t regs = reglist;
63    uint32_t ones = number_of_ones(reglist);
64    // Remember that writeback adds a uop or two and the temp register adds one
65    numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1;
66
67    // It's technically legal to do a lot of nothing
68    if (!ones)
69        numMicroops = 1;
70
71    microOps = new StaticInstPtr[numMicroops];
72    uint32_t addr = 0;
73
74    if (!up)
75        addr = (ones << 2) - 4;
76
77    if (!index)
78        addr += 4;
79
80    StaticInstPtr *uop = microOps;
81
82    // Add 0 to Rn and stick it in ureg0.
83    // This is equivalent to a move.
84    *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
85
86    unsigned reg = 0;
87    unsigned regIdx = 0;
88    bool force_user = user & !bits(reglist, 15);
89    bool exception_ret = user & bits(reglist, 15);
90
91    for (int i = 0; i < ones; i++) {
92        // Find the next register.
93        while (!bits(regs, reg))
94            reg++;
95        replaceBits(regs, reg, 0);
96
97        regIdx = reg;
98        if (force_user) {
99            regIdx = intRegInMode(MODE_USER, regIdx);
100        }
101
102        if (load) {
103            if (writeback && i == ones - 1) {
104                // If it's a writeback and this is the last register
105                // do the load into a temporary register which we'll move
106                // into the final one later
107                *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0,
108                        up, addr);
109            } else {
110                // Otherwise just do it normally
111                if (reg == INTREG_PC && exception_ret) {
112                    // This must be the exception return form of ldm.
113                    *++uop = new MicroLdrRetUop(machInst, regIdx,
114                                               INTREG_UREG0, up, addr);
115                    if (!(condCode == COND_AL || condCode == COND_UC))
116                        (*uop)->setFlag(StaticInst::IsCondControl);
117                    else
118                        (*uop)->setFlag(StaticInst::IsUncondControl);
119                } else {
120                    *++uop = new MicroLdrUop(machInst, regIdx,
121                                            INTREG_UREG0, up, addr);
122                    if (reg == INTREG_PC) {
123                        (*uop)->setFlag(StaticInst::IsControl);
124                        if (!(condCode == COND_AL || condCode == COND_UC))
125                            (*uop)->setFlag(StaticInst::IsCondControl);
126                        else
127                            (*uop)->setFlag(StaticInst::IsUncondControl);
128                        (*uop)->setFlag(StaticInst::IsIndirectControl);
129                    }
130                }
131            }
132        } else {
133            *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);
134        }
135
136        if (up)
137            addr += 4;
138        else
139            addr -= 4;
140    }
141
142    if (writeback && ones) {
143        // put the register update after we're done all loading
144        if (up)
145            *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4);
146        else
147            *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4);
148
149        // If this was a load move the last temporary value into place
150        // this way we can't take an exception after we update the base
151        // register.
152        if (load && reg == INTREG_PC && exception_ret) {
153            *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
154            if (!(condCode == COND_AL || condCode == COND_UC))
155                (*uop)->setFlag(StaticInst::IsCondControl);
156            else
157                (*uop)->setFlag(StaticInst::IsUncondControl);
158        } else if (load) {
159            *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1);
160            if (reg == INTREG_PC) {
161                (*uop)->setFlag(StaticInst::IsControl);
162                (*uop)->setFlag(StaticInst::IsCondControl);
163                (*uop)->setFlag(StaticInst::IsIndirectControl);
164                // This is created as a RAS POP
165                if (rn == INTREG_SP)
166                    (*uop)->setFlag(StaticInst::IsReturn);
167
168            }
169        }
170    }
171
172    (*uop)->setLastMicroop();
173
174    /* Take the control flags from the last microop for the macroop */
175    if ((*uop)->isControl())
176        setFlag(StaticInst::IsControl);
177    if ((*uop)->isCondCtrl())
178        setFlag(StaticInst::IsCondControl);
179    if ((*uop)->isIndirectCtrl())
180        setFlag(StaticInst::IsIndirectControl);
181    if ((*uop)->isReturn())
182        setFlag(StaticInst::IsReturn);
183
184    for (StaticInstPtr *curUop = microOps;
185            !(*curUop)->isLastMicroop(); curUop++) {
186        MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
187        assert(uopPtr);
188        uopPtr->setDelayedCommit();
189    }
190}
191
192PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
193                     uint32_t size, bool fp, bool load, bool noAlloc,
194                     bool signExt, bool exclusive, bool acrel,
195                     int64_t imm, AddrMode mode,
196                     IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
197    PredMacroOp(mnem, machInst, __opClass)
198{
199    bool writeback = (mode != AddrMd_Offset);
200    numMicroops = 1 + (size / 4) + (writeback ? 1 : 0);
201    microOps = new StaticInstPtr[numMicroops];
202
203    StaticInstPtr *uop = microOps;
204
205    bool post = (mode == AddrMd_PostIndex);
206
207    rn = makeSP(rn);
208
209    *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm);
210
211    if (fp) {
212        if (size == 16) {
213            if (load) {
214                *++uop = new MicroLdrQBFpXImmUop(machInst, rt,
215                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
216                *++uop = new MicroLdrQTFpXImmUop(machInst, rt,
217                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
218                *++uop = new MicroLdrQBFpXImmUop(machInst, rt2,
219                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
220                *++uop = new MicroLdrQTFpXImmUop(machInst, rt2,
221                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
222            } else {
223                *++uop = new MicroStrQBFpXImmUop(machInst, rt,
224                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
225                *++uop = new MicroStrQTFpXImmUop(machInst, rt,
226                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
227                *++uop = new MicroStrQBFpXImmUop(machInst, rt2,
228                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
229                *++uop = new MicroStrQTFpXImmUop(machInst, rt2,
230                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
231            }
232        } else if (size == 8) {
233            if (load) {
234                *++uop = new MicroLdrFpXImmUop(machInst, rt,
235                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
236                *++uop = new MicroLdrFpXImmUop(machInst, rt2,
237                        INTREG_UREG0, 8, noAlloc, exclusive, acrel);
238            } else {
239                *++uop = new MicroStrFpXImmUop(machInst, rt,
240                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
241                *++uop = new MicroStrFpXImmUop(machInst, rt2,
242                        INTREG_UREG0, 8, noAlloc, exclusive, acrel);
243            }
244        } else if (size == 4) {
245            if (load) {
246                *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2,
247                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
248            } else {
249                *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2,
250                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
251            }
252        }
253    } else {
254        if (size == 8) {
255            if (load) {
256                *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0,
257                        0, noAlloc, exclusive, acrel);
258                *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0,
259                        size, noAlloc, exclusive, acrel);
260            } else {
261                *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0,
262                        0, noAlloc, exclusive, acrel);
263                *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0,
264                        size, noAlloc, exclusive, acrel);
265            }
266        } else if (size == 4) {
267            if (load) {
268                if (signExt) {
269                    *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2,
270                            INTREG_UREG0, 0, noAlloc, exclusive, acrel);
271                } else {
272                    *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2,
273                            INTREG_UREG0, 0, noAlloc, exclusive, acrel);
274                }
275            } else {
276                *++uop = new MicroStrDXImmUop(machInst, rt, rt2,
277                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
278            }
279        }
280    }
281
282    if (writeback) {
283        *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0,
284                                   post ? imm : 0);
285    }
286
287    (*uop)->setLastMicroop();
288
289    for (StaticInstPtr *curUop = microOps;
290            !(*curUop)->isLastMicroop(); curUop++) {
291        (*curUop)->setDelayedCommit();
292    }
293}
294
295BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
296                             OpClass __opClass, bool load, IntRegIndex dest,
297                             IntRegIndex base, int64_t imm) :
298    PredMacroOp(mnem, machInst, __opClass)
299{
300    numMicroops = 2;
301    microOps = new StaticInstPtr[numMicroops];
302
303    if (load) {
304        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
305        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
306    } else {
307        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
308        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
309    }
310    microOps[0]->setDelayedCommit();
311    microOps[1]->setLastMicroop();
312}
313
314BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
315                               OpClass __opClass, bool load, IntRegIndex dest,
316                               IntRegIndex base, int64_t imm) :
317    PredMacroOp(mnem, machInst, __opClass)
318{
319    numMicroops = 3;
320    microOps = new StaticInstPtr[numMicroops];
321
322    if (load) {
323        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0);
324        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0);
325    } else {
326        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0);
327        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
328    }
329    microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
330
331    microOps[0]->setDelayedCommit();
332    microOps[1]->setDelayedCommit();
333    microOps[2]->setLastMicroop();
334}
335
336BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
337                             OpClass __opClass, bool load, IntRegIndex dest,
338                             IntRegIndex base, int64_t imm) :
339    PredMacroOp(mnem, machInst, __opClass)
340{
341    numMicroops = 3;
342    microOps = new StaticInstPtr[numMicroops];
343
344    if (load) {
345        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
346        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
347    } else {
348        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
349        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
350    }
351    microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
352
353    microOps[0]->setDelayedCommit();
354    microOps[1]->setDelayedCommit();
355    microOps[2]->setLastMicroop();
356}
357
358BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
359                             OpClass __opClass, bool load, IntRegIndex dest,
360                             IntRegIndex base, IntRegIndex offset,
361                             ArmExtendType type, int64_t imm) :
362    PredMacroOp(mnem, machInst, __opClass)
363{
364    numMicroops = 2;
365    microOps = new StaticInstPtr[numMicroops];
366
367    if (load) {
368        microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base,
369                                              offset, type, imm);
370        microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base,
371                                              offset, type, imm);
372    } else {
373        microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base,
374                                              offset, type, imm);
375        microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base,
376                                              offset, type, imm);
377    }
378
379    microOps[0]->setDelayedCommit();
380    microOps[1]->setLastMicroop();
381}
382
383BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
384                             OpClass __opClass, IntRegIndex dest,
385                             int64_t imm) :
386    PredMacroOp(mnem, machInst, __opClass)
387{
388    numMicroops = 2;
389    microOps = new StaticInstPtr[numMicroops];
390
391    microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm);
392    microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm);
393
394    microOps[0]->setDelayedCommit();
395    microOps[1]->setLastMicroop();
396}
397
398VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
399                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
400                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
401    PredMacroOp(mnem, machInst, __opClass)
402{
403    assert(regs > 0 && regs <= 4);
404    assert(regs % elems == 0);
405
406    numMicroops = (regs > 2) ? 2 : 1;
407    bool wb = (rm != 15);
408    bool deinterleave = (elems > 1);
409
410    if (wb) numMicroops++;
411    if (deinterleave) numMicroops += (regs / elems);
412    microOps = new StaticInstPtr[numMicroops];
413
414    RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
415
416    uint32_t noAlign = TLB::MustBeOne;
417
418    unsigned uopIdx = 0;
419    switch (regs) {
420      case 4:
421        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
422                size, machInst, rMid, rn, 0, align);
423        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
424                size, machInst, rMid + 4, rn, 16, noAlign);
425        break;
426      case 3:
427        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
428                size, machInst, rMid, rn, 0, align);
429        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
430                size, machInst, rMid + 4, rn, 16, noAlign);
431        break;
432      case 2:
433        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
434                size, machInst, rMid, rn, 0, align);
435        break;
436      case 1:
437        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
438                size, machInst, rMid, rn, 0, align);
439        break;
440      default:
441        // Unknown number of registers
442        microOps[uopIdx++] = new Unknown(machInst);
443    }
444    if (wb) {
445        if (rm != 15 && rm != 13) {
446            microOps[uopIdx++] =
447                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
448        } else {
449            microOps[uopIdx++] =
450                new MicroAddiUop(machInst, rn, rn, regs * 8);
451        }
452    }
453    if (deinterleave) {
454        switch (elems) {
455          case 4:
456            assert(regs == 4);
457            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
458                    size, machInst, vd * 2, rMid, inc * 2);
459            break;
460          case 3:
461            assert(regs == 3);
462            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
463                    size, machInst, vd * 2, rMid, inc * 2);
464            break;
465          case 2:
466            assert(regs == 4 || regs == 2);
467            if (regs == 4) {
468                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
469                        size, machInst, vd * 2, rMid, inc * 2);
470                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
471                        size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
472            } else {
473                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
474                        size, machInst, vd * 2, rMid, inc * 2);
475            }
476            break;
477          default:
478            // Bad number of elements to deinterleave
479            microOps[uopIdx++] = new Unknown(machInst);
480        }
481    }
482    assert(uopIdx == numMicroops);
483
484    for (unsigned i = 0; i < numMicroops - 1; i++) {
485        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
486        assert(uopPtr);
487        uopPtr->setDelayedCommit();
488    }
489    microOps[numMicroops - 1]->setLastMicroop();
490}
491
492VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
493                         OpClass __opClass, bool all, unsigned elems,
494                         RegIndex rn, RegIndex vd, unsigned regs,
495                         unsigned inc, uint32_t size, uint32_t align,
496                         RegIndex rm, unsigned lane) :
497    PredMacroOp(mnem, machInst, __opClass)
498{
499    assert(regs > 0 && regs <= 4);
500    assert(regs % elems == 0);
501
502    unsigned eBytes = (1 << size);
503    unsigned loadSize = eBytes * elems;
504    unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
505                        sizeof(FloatRegBits);
506
507    assert(loadRegs > 0 && loadRegs <= 4);
508
509    numMicroops = 1;
510    bool wb = (rm != 15);
511
512    if (wb) numMicroops++;
513    numMicroops += (regs / elems);
514    microOps = new StaticInstPtr[numMicroops];
515
516    RegIndex ufp0 = NumFloatV7ArchRegs;
517
518    unsigned uopIdx = 0;
519    switch (loadSize) {
520      case 1:
521        microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
522                machInst, ufp0, rn, 0, align);
523        break;
524      case 2:
525        if (eBytes == 2) {
526            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
527                    machInst, ufp0, rn, 0, align);
528        } else {
529            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
530                    machInst, ufp0, rn, 0, align);
531        }
532        break;
533      case 3:
534        microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
535                machInst, ufp0, rn, 0, align);
536        break;
537      case 4:
538        switch (eBytes) {
539          case 1:
540            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
541                    machInst, ufp0, rn, 0, align);
542            break;
543          case 2:
544            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
545                    machInst, ufp0, rn, 0, align);
546            break;
547          case 4:
548            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
549                    machInst, ufp0, rn, 0, align);
550            break;
551        }
552        break;
553      case 6:
554        microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
555                machInst, ufp0, rn, 0, align);
556        break;
557      case 8:
558        switch (eBytes) {
559          case 2:
560            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
561                    machInst, ufp0, rn, 0, align);
562            break;
563          case 4:
564            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
565                    machInst, ufp0, rn, 0, align);
566            break;
567        }
568        break;
569      case 12:
570        microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
571                machInst, ufp0, rn, 0, align);
572        break;
573      case 16:
574        microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
575                machInst, ufp0, rn, 0, align);
576        break;
577      default:
578        // Unrecognized load size
579        microOps[uopIdx++] = new Unknown(machInst);
580    }
581    if (wb) {
582        if (rm != 15 && rm != 13) {
583            microOps[uopIdx++] =
584                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
585        } else {
586            microOps[uopIdx++] =
587                new MicroAddiUop(machInst, rn, rn, loadSize);
588        }
589    }
590    switch (elems) {
591      case 4:
592        assert(regs == 4);
593        switch (size) {
594          case 0:
595            if (all) {
596                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
597                        machInst, vd * 2, ufp0, inc * 2);
598            } else {
599                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
600                        machInst, vd * 2, ufp0, inc * 2, lane);
601            }
602            break;
603          case 1:
604            if (all) {
605                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
606                        machInst, vd * 2, ufp0, inc * 2);
607            } else {
608                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
609                        machInst, vd * 2, ufp0, inc * 2, lane);
610            }
611            break;
612          case 2:
613            if (all) {
614                microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
615                        machInst, vd * 2, ufp0, inc * 2);
616            } else {
617                microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
618                        machInst, vd * 2, ufp0, inc * 2, lane);
619            }
620            break;
621          default:
622            // Bad size
623            microOps[uopIdx++] = new Unknown(machInst);
624            break;
625        }
626        break;
627      case 3:
628        assert(regs == 3);
629        switch (size) {
630          case 0:
631            if (all) {
632                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
633                        machInst, vd * 2, ufp0, inc * 2);
634            } else {
635                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
636                        machInst, vd * 2, ufp0, inc * 2, lane);
637            }
638            break;
639          case 1:
640            if (all) {
641                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
642                        machInst, vd * 2, ufp0, inc * 2);
643            } else {
644                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
645                        machInst, vd * 2, ufp0, inc * 2, lane);
646            }
647            break;
648          case 2:
649            if (all) {
650                microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
651                        machInst, vd * 2, ufp0, inc * 2);
652            } else {
653                microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
654                        machInst, vd * 2, ufp0, inc * 2, lane);
655            }
656            break;
657          default:
658            // Bad size
659            microOps[uopIdx++] = new Unknown(machInst);
660            break;
661        }
662        break;
663      case 2:
664        assert(regs == 2);
665        assert(loadRegs <= 2);
666        switch (size) {
667          case 0:
668            if (all) {
669                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
670                        machInst, vd * 2, ufp0, inc * 2);
671            } else {
672                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
673                        machInst, vd * 2, ufp0, inc * 2, lane);
674            }
675            break;
676          case 1:
677            if (all) {
678                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
679                        machInst, vd * 2, ufp0, inc * 2);
680            } else {
681                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
682                        machInst, vd * 2, ufp0, inc * 2, lane);
683            }
684            break;
685          case 2:
686            if (all) {
687                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
688                        machInst, vd * 2, ufp0, inc * 2);
689            } else {
690                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
691                        machInst, vd * 2, ufp0, inc * 2, lane);
692            }
693            break;
694          default:
695            // Bad size
696            microOps[uopIdx++] = new Unknown(machInst);
697            break;
698        }
699        break;
700      case 1:
701        assert(regs == 1 || (all && regs == 2));
702        assert(loadRegs <= 2);
703        for (unsigned offset = 0; offset < regs; offset++) {
704            switch (size) {
705              case 0:
706                if (all) {
707                    microOps[uopIdx++] =
708                        new MicroUnpackAllNeon2to2Uop<uint8_t>(
709                            machInst, (vd + offset) * 2, ufp0, inc * 2);
710                } else {
711                    microOps[uopIdx++] =
712                        new MicroUnpackNeon2to2Uop<uint8_t>(
713                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
714                }
715                break;
716              case 1:
717                if (all) {
718                    microOps[uopIdx++] =
719                        new MicroUnpackAllNeon2to2Uop<uint16_t>(
720                            machInst, (vd + offset) * 2, ufp0, inc * 2);
721                } else {
722                    microOps[uopIdx++] =
723                        new MicroUnpackNeon2to2Uop<uint16_t>(
724                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
725                }
726                break;
727              case 2:
728                if (all) {
729                    microOps[uopIdx++] =
730                        new MicroUnpackAllNeon2to2Uop<uint32_t>(
731                            machInst, (vd + offset) * 2, ufp0, inc * 2);
732                } else {
733                    microOps[uopIdx++] =
734                        new MicroUnpackNeon2to2Uop<uint32_t>(
735                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
736                }
737                break;
738              default:
739                // Bad size
740                microOps[uopIdx++] = new Unknown(machInst);
741                break;
742            }
743        }
744        break;
745      default:
746        // Bad number of elements to unpack
747        microOps[uopIdx++] = new Unknown(machInst);
748    }
749    assert(uopIdx == numMicroops);
750
751    for (unsigned i = 0; i < numMicroops - 1; i++) {
752        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
753        assert(uopPtr);
754        uopPtr->setDelayedCommit();
755    }
756    microOps[numMicroops - 1]->setLastMicroop();
757}
758
759VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
760                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
761                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
762    PredMacroOp(mnem, machInst, __opClass)
763{
764    assert(regs > 0 && regs <= 4);
765    assert(regs % elems == 0);
766
767    numMicroops = (regs > 2) ? 2 : 1;
768    bool wb = (rm != 15);
769    bool interleave = (elems > 1);
770
771    if (wb) numMicroops++;
772    if (interleave) numMicroops += (regs / elems);
773    microOps = new StaticInstPtr[numMicroops];
774
775    uint32_t noAlign = TLB::MustBeOne;
776
777    RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
778
779    unsigned uopIdx = 0;
780    if (interleave) {
781        switch (elems) {
782          case 4:
783            assert(regs == 4);
784            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
785                    size, machInst, rMid, vd * 2, inc * 2);
786            break;
787          case 3:
788            assert(regs == 3);
789            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
790                    size, machInst, rMid, vd * 2, inc * 2);
791            break;
792          case 2:
793            assert(regs == 4 || regs == 2);
794            if (regs == 4) {
795                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
796                        size, machInst, rMid, vd * 2, inc * 2);
797                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
798                        size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
799            } else {
800                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
801                        size, machInst, rMid, vd * 2, inc * 2);
802            }
803            break;
804          default:
805            // Bad number of elements to interleave
806            microOps[uopIdx++] = new Unknown(machInst);
807        }
808    }
809    switch (regs) {
810      case 4:
811        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
812                size, machInst, rMid, rn, 0, align);
813        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
814                size, machInst, rMid + 4, rn, 16, noAlign);
815        break;
816      case 3:
817        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
818                size, machInst, rMid, rn, 0, align);
819        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
820                size, machInst, rMid + 4, rn, 16, noAlign);
821        break;
822      case 2:
823        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
824                size, machInst, rMid, rn, 0, align);
825        break;
826      case 1:
827        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
828                size, machInst, rMid, rn, 0, align);
829        break;
830      default:
831        // Unknown number of registers
832        microOps[uopIdx++] = new Unknown(machInst);
833    }
834    if (wb) {
835        if (rm != 15 && rm != 13) {
836            microOps[uopIdx++] =
837                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
838        } else {
839            microOps[uopIdx++] =
840                new MicroAddiUop(machInst, rn, rn, regs * 8);
841        }
842    }
843    assert(uopIdx == numMicroops);
844
845    for (unsigned i = 0; i < numMicroops - 1; i++) {
846        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
847        assert(uopPtr);
848        uopPtr->setDelayedCommit();
849    }
850    microOps[numMicroops - 1]->setLastMicroop();
851}
852
853VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
854                         OpClass __opClass, bool all, unsigned elems,
855                         RegIndex rn, RegIndex vd, unsigned regs,
856                         unsigned inc, uint32_t size, uint32_t align,
857                         RegIndex rm, unsigned lane) :
858    PredMacroOp(mnem, machInst, __opClass)
859{
860    assert(!all);
861    assert(regs > 0 && regs <= 4);
862    assert(regs % elems == 0);
863
864    unsigned eBytes = (1 << size);
865    unsigned storeSize = eBytes * elems;
866    unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
867                         sizeof(FloatRegBits);
868
869    assert(storeRegs > 0 && storeRegs <= 4);
870
871    numMicroops = 1;
872    bool wb = (rm != 15);
873
874    if (wb) numMicroops++;
875    numMicroops += (regs / elems);
876    microOps = new StaticInstPtr[numMicroops];
877
878    RegIndex ufp0 = NumFloatV7ArchRegs;
879
880    unsigned uopIdx = 0;
881    switch (elems) {
882      case 4:
883        assert(regs == 4);
884        switch (size) {
885          case 0:
886            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
887                    machInst, ufp0, vd * 2, inc * 2, lane);
888            break;
889          case 1:
890            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
891                    machInst, ufp0, vd * 2, inc * 2, lane);
892            break;
893          case 2:
894            microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
895                    machInst, ufp0, vd * 2, inc * 2, lane);
896            break;
897          default:
898            // Bad size
899            microOps[uopIdx++] = new Unknown(machInst);
900            break;
901        }
902        break;
903      case 3:
904        assert(regs == 3);
905        switch (size) {
906          case 0:
907            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
908                    machInst, ufp0, vd * 2, inc * 2, lane);
909            break;
910          case 1:
911            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
912                    machInst, ufp0, vd * 2, inc * 2, lane);
913            break;
914          case 2:
915            microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
916                    machInst, ufp0, vd * 2, inc * 2, lane);
917            break;
918          default:
919            // Bad size
920            microOps[uopIdx++] = new Unknown(machInst);
921            break;
922        }
923        break;
924      case 2:
925        assert(regs == 2);
926        assert(storeRegs <= 2);
927        switch (size) {
928          case 0:
929            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
930                    machInst, ufp0, vd * 2, inc * 2, lane);
931            break;
932          case 1:
933            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
934                    machInst, ufp0, vd * 2, inc * 2, lane);
935            break;
936          case 2:
937            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
938                    machInst, ufp0, vd * 2, inc * 2, lane);
939            break;
940          default:
941            // Bad size
942            microOps[uopIdx++] = new Unknown(machInst);
943            break;
944        }
945        break;
946      case 1:
947        assert(regs == 1 || (all && regs == 2));
948        assert(storeRegs <= 2);
949        for (unsigned offset = 0; offset < regs; offset++) {
950            switch (size) {
951              case 0:
952                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
953                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
954                break;
955              case 1:
956                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
957                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
958                break;
959              case 2:
960                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
961                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
962                break;
963              default:
964                // Bad size
965                microOps[uopIdx++] = new Unknown(machInst);
966                break;
967            }
968        }
969        break;
970      default:
971        // Bad number of elements to unpack
972        microOps[uopIdx++] = new Unknown(machInst);
973    }
974    switch (storeSize) {
975      case 1:
976        microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
977                machInst, ufp0, rn, 0, align);
978        break;
979      case 2:
980        if (eBytes == 2) {
981            microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
982                    machInst, ufp0, rn, 0, align);
983        } else {
984            microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
985                    machInst, ufp0, rn, 0, align);
986        }
987        break;
988      case 3:
989        microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
990                machInst, ufp0, rn, 0, align);
991        break;
992      case 4:
993        switch (eBytes) {
994          case 1:
995            microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
996                    machInst, ufp0, rn, 0, align);
997            break;
998          case 2:
999            microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1000                    machInst, ufp0, rn, 0, align);
1001            break;
1002          case 4:
1003            microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1004                    machInst, ufp0, rn, 0, align);
1005            break;
1006        }
1007        break;
1008      case 6:
1009        microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1010                machInst, ufp0, rn, 0, align);
1011        break;
1012      case 8:
1013        switch (eBytes) {
1014          case 2:
1015            microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1016                    machInst, ufp0, rn, 0, align);
1017            break;
1018          case 4:
1019            microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1020                    machInst, ufp0, rn, 0, align);
1021            break;
1022        }
1023        break;
1024      case 12:
1025        microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1026                machInst, ufp0, rn, 0, align);
1027        break;
1028      case 16:
1029        microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1030                machInst, ufp0, rn, 0, align);
1031        break;
1032      default:
1033        // Bad store size
1034        microOps[uopIdx++] = new Unknown(machInst);
1035    }
1036    if (wb) {
1037        if (rm != 15 && rm != 13) {
1038            microOps[uopIdx++] =
1039                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1040        } else {
1041            microOps[uopIdx++] =
1042                new MicroAddiUop(machInst, rn, rn, storeSize);
1043        }
1044    }
1045    assert(uopIdx == numMicroops);
1046
1047    for (unsigned i = 0; i < numMicroops - 1; i++) {
1048        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1049        assert(uopPtr);
1050        uopPtr->setDelayedCommit();
1051    }
1052    microOps[numMicroops - 1]->setLastMicroop();
1053}
1054
1055VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1056                         OpClass __opClass, RegIndex rn, RegIndex vd,
1057                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
1058                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
1059    PredMacroOp(mnem, machInst, __opClass)
1060{
1061    RegIndex vx = NumFloatV8ArchRegs / 4;
1062    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1063    bool baseIsSP = isSP((IntRegIndex) rnsp);
1064
1065    numMicroops = wb ? 1 : 0;
1066
1067    int totNumBytes = numRegs * dataSize / 8;
1068    assert(totNumBytes <= 64);
1069
1070    // The guiding principle here is that no more than 16 bytes can be
1071    // transferred at a time
1072    int numMemMicroops = totNumBytes / 16;
1073    int residuum = totNumBytes % 16;
1074    if (residuum)
1075        ++numMemMicroops;
1076    numMicroops += numMemMicroops;
1077
1078    int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1079    numMicroops += numMarshalMicroops;
1080
1081    microOps = new StaticInstPtr[numMicroops];
1082    unsigned uopIdx = 0;
1083    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1084        TLB::AllowUnaligned;
1085
1086    int i = 0;
1087    for(; i < numMemMicroops - 1; ++i) {
1088        microOps[uopIdx++] = new MicroNeonLoad64(
1089            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1090            baseIsSP, 16 /* accSize */, eSize);
1091    }
1092    microOps[uopIdx++] =  new MicroNeonLoad64(
1093        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1094        residuum ? residuum : 16 /* accSize */, eSize);
1095
1096    // Writeback microop: the post-increment amount is encoded in "Rm": a
1097    // 64-bit general register OR as '11111' for an immediate value equal to
1098    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1099    if (wb) {
1100        if (rm != ((RegIndex) INTREG_X31)) {
1101            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1102                                                      UXTX, 0);
1103        } else {
1104            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1105                                                   totNumBytes);
1106        }
1107    }
1108
1109    for (int i = 0; i < numMarshalMicroops; ++i) {
1110        microOps[uopIdx++] = new MicroDeintNeon64(
1111            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1112            numStructElems, numRegs, i /* step */);
1113    }
1114
1115    assert(uopIdx == numMicroops);
1116
1117    for (int i = 0; i < numMicroops - 1; ++i) {
1118        microOps[i]->setDelayedCommit();
1119    }
1120    microOps[numMicroops - 1]->setLastMicroop();
1121}
1122
1123VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1124                         OpClass __opClass, RegIndex rn, RegIndex vd,
1125                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
1126                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
1127    PredMacroOp(mnem, machInst, __opClass)
1128{
1129    RegIndex vx = NumFloatV8ArchRegs / 4;
1130    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1131    bool baseIsSP = isSP((IntRegIndex) rnsp);
1132
1133    numMicroops = wb ? 1 : 0;
1134
1135    int totNumBytes = numRegs * dataSize / 8;
1136    assert(totNumBytes <= 64);
1137
1138    // The guiding principle here is that no more than 16 bytes can be
1139    // transferred at a time
1140    int numMemMicroops = totNumBytes / 16;
1141    int residuum = totNumBytes % 16;
1142    if (residuum)
1143        ++numMemMicroops;
1144    numMicroops += numMemMicroops;
1145
1146    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1147    numMicroops += numMarshalMicroops;
1148
1149    microOps = new StaticInstPtr[numMicroops];
1150    unsigned uopIdx = 0;
1151
1152    for(int i = 0; i < numMarshalMicroops; ++i) {
1153        microOps[uopIdx++] = new MicroIntNeon64(
1154            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1155            numStructElems, numRegs, i /* step */);
1156    }
1157
1158    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1159        TLB::AllowUnaligned;
1160
1161    int i = 0;
1162    for(; i < numMemMicroops - 1; ++i) {
1163        microOps[uopIdx++] = new MicroNeonStore64(
1164            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1165            baseIsSP, 16 /* accSize */, eSize);
1166    }
1167    microOps[uopIdx++] = new MicroNeonStore64(
1168        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1169        residuum ? residuum : 16 /* accSize */, eSize);
1170
1171    // Writeback microop: the post-increment amount is encoded in "Rm": a
1172    // 64-bit general register OR as '11111' for an immediate value equal to
1173    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1174    if (wb) {
1175        if (rm != ((RegIndex) INTREG_X31)) {
1176            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1177                                                      UXTX, 0);
1178        } else {
1179            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1180                                                   totNumBytes);
1181        }
1182    }
1183
1184    assert(uopIdx == numMicroops);
1185
1186    for (int i = 0; i < numMicroops - 1; i++) {
1187        microOps[i]->setDelayedCommit();
1188    }
1189    microOps[numMicroops - 1]->setLastMicroop();
1190}
1191
1192VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1193                             OpClass __opClass, RegIndex rn, RegIndex vd,
1194                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
1195                             uint8_t numStructElems, uint8_t index, bool wb,
1196                             bool replicate) :
1197    PredMacroOp(mnem, machInst, __opClass)
1198{
1199    RegIndex vx = NumFloatV8ArchRegs / 4;
1200    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1201    bool baseIsSP = isSP((IntRegIndex) rnsp);
1202
1203    numMicroops = wb ? 1 : 0;
1204
1205    int eSizeBytes = 1 << eSize;
1206    int totNumBytes = numStructElems * eSizeBytes;
1207    assert(totNumBytes <= 64);
1208
1209    // The guiding principle here is that no more than 16 bytes can be
1210    // transferred at a time
1211    int numMemMicroops = totNumBytes / 16;
1212    int residuum = totNumBytes % 16;
1213    if (residuum)
1214        ++numMemMicroops;
1215    numMicroops += numMemMicroops;
1216
1217    int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1218    numMicroops += numMarshalMicroops;
1219
1220    microOps = new StaticInstPtr[numMicroops];
1221    unsigned uopIdx = 0;
1222
1223    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1224        TLB::AllowUnaligned;
1225
1226    int i = 0;
1227    for (; i < numMemMicroops - 1; ++i) {
1228        microOps[uopIdx++] = new MicroNeonLoad64(
1229            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1230            baseIsSP, 16 /* accSize */, eSize);
1231    }
1232    microOps[uopIdx++] = new MicroNeonLoad64(
1233        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1234        residuum ? residuum : 16 /* accSize */, eSize);
1235
1236    // Writeback microop: the post-increment amount is encoded in "Rm": a
1237    // 64-bit general register OR as '11111' for an immediate value equal to
1238    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1239    if (wb) {
1240        if (rm != ((RegIndex) INTREG_X31)) {
1241            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1242                                                      UXTX, 0);
1243        } else {
1244            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1245                                                   totNumBytes);
1246        }
1247    }
1248
1249    for(int i = 0; i < numMarshalMicroops; ++i) {
1250        microOps[uopIdx++] = new MicroUnpackNeon64(
1251            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1252            numStructElems, index, i /* step */, replicate);
1253    }
1254
1255    assert(uopIdx == numMicroops);
1256
1257    for (int i = 0; i < numMicroops - 1; i++) {
1258        microOps[i]->setDelayedCommit();
1259    }
1260    microOps[numMicroops - 1]->setLastMicroop();
1261}
1262
1263VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1264                             OpClass __opClass, RegIndex rn, RegIndex vd,
1265                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
1266                             uint8_t numStructElems, uint8_t index, bool wb,
1267                             bool replicate) :
1268    PredMacroOp(mnem, machInst, __opClass)
1269{
1270    RegIndex vx = NumFloatV8ArchRegs / 4;
1271    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1272    bool baseIsSP = isSP((IntRegIndex) rnsp);
1273
1274    numMicroops = wb ? 1 : 0;
1275
1276    int eSizeBytes = 1 << eSize;
1277    int totNumBytes = numStructElems * eSizeBytes;
1278    assert(totNumBytes <= 64);
1279
1280    // The guiding principle here is that no more than 16 bytes can be
1281    // transferred at a time
1282    int numMemMicroops = totNumBytes / 16;
1283    int residuum = totNumBytes % 16;
1284    if (residuum)
1285        ++numMemMicroops;
1286    numMicroops += numMemMicroops;
1287
1288    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1289    numMicroops += numMarshalMicroops;
1290
1291    microOps = new StaticInstPtr[numMicroops];
1292    unsigned uopIdx = 0;
1293
1294    for(int i = 0; i < numMarshalMicroops; ++i) {
1295        microOps[uopIdx++] = new MicroPackNeon64(
1296            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1297            numStructElems, index, i /* step */, replicate);
1298    }
1299
1300    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1301        TLB::AllowUnaligned;
1302
1303    int i = 0;
1304    for(; i < numMemMicroops - 1; ++i) {
1305        microOps[uopIdx++] = new MicroNeonStore64(
1306            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1307            baseIsSP, 16 /* accsize */, eSize);
1308    }
1309    microOps[uopIdx++] = new MicroNeonStore64(
1310        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1311        residuum ? residuum : 16 /* accSize */, eSize);
1312
1313    // Writeback microop: the post-increment amount is encoded in "Rm": a
1314    // 64-bit general register OR as '11111' for an immediate value equal to
1315    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1316    if (wb) {
1317        if (rm != ((RegIndex) INTREG_X31)) {
1318            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1319                                                      UXTX, 0);
1320        } else {
1321            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1322                                                   totNumBytes);
1323        }
1324    }
1325
1326    assert(uopIdx == numMicroops);
1327
1328    for (int i = 0; i < numMicroops - 1; i++) {
1329        microOps[i]->setDelayedCommit();
1330    }
1331    microOps[numMicroops - 1]->setLastMicroop();
1332}
1333
1334MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1335                             OpClass __opClass, IntRegIndex rn,
1336                             RegIndex vd, bool single, bool up,
1337                             bool writeback, bool load, uint32_t offset) :
1338    PredMacroOp(mnem, machInst, __opClass)
1339{
1340    int i = 0;
1341
1342    // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1343    // to be functionally identical except that fldmx is deprecated. For now
1344    // we'll assume they're otherwise interchangable.
1345    int count = (single ? offset : (offset / 2));
1346    if (count == 0 || count > NumFloatV7ArchRegs)
1347        warn_once("Bad offset field for VFP load/store multiple.\n");
1348    if (count == 0) {
1349        // Force there to be at least one microop so the macroop makes sense.
1350        writeback = true;
1351    }
1352    if (count > NumFloatV7ArchRegs)
1353        count = NumFloatV7ArchRegs;
1354
1355    numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1356    microOps = new StaticInstPtr[numMicroops];
1357
1358    int64_t addr = 0;
1359
1360    if (!up)
1361        addr = 4 * offset;
1362
1363    bool tempUp = up;
1364    for (int j = 0; j < count; j++) {
1365        if (load) {
1366            if (single) {
1367                microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1368                                                  tempUp, addr);
1369            } else {
1370                microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1371                                                    tempUp, addr);
1372                microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1373                                                    addr + (up ? 4 : -4));
1374            }
1375        } else {
1376            if (single) {
1377                microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1378                                                  tempUp, addr);
1379            } else {
1380                microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1381                                                    tempUp, addr);
1382                microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1383                                                    addr + (up ? 4 : -4));
1384            }
1385        }
1386        if (!tempUp) {
1387            addr -= (single ? 4 : 8);
1388            // The microops don't handle negative displacement, so turn if we
1389            // hit zero, flip polarity and start adding.
1390            if (addr <= 0) {
1391                tempUp = true;
1392                addr = -addr;
1393            }
1394        } else {
1395            addr += (single ? 4 : 8);
1396        }
1397    }
1398
1399    if (writeback) {
1400        if (up) {
1401            microOps[i++] =
1402                new MicroAddiUop(machInst, rn, rn, 4 * offset);
1403        } else {
1404            microOps[i++] =
1405                new MicroSubiUop(machInst, rn, rn, 4 * offset);
1406        }
1407    }
1408
1409    assert(numMicroops == i);
1410    microOps[numMicroops - 1]->setLastMicroop();
1411
1412    for (StaticInstPtr *curUop = microOps;
1413            !(*curUop)->isLastMicroop(); curUop++) {
1414        MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1415        assert(uopPtr);
1416        uopPtr->setDelayedCommit();
1417    }
1418}
1419
1420std::string
1421MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1422{
1423    std::stringstream ss;
1424    printMnemonic(ss);
1425    printReg(ss, ura);
1426    ss << ", ";
1427    printReg(ss, urb);
1428    ss << ", ";
1429    ccprintf(ss, "#%d", imm);
1430    return ss.str();
1431}
1432
1433std::string
1434MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1435{
1436    std::stringstream ss;
1437    printMnemonic(ss);
1438    printReg(ss, ura);
1439    ss << ", ";
1440    printReg(ss, urb);
1441    ss << ", ";
1442    ccprintf(ss, "#%d", imm);
1443    return ss.str();
1444}
1445
1446std::string
1447MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1448{
1449    std::stringstream ss;
1450    printMnemonic(ss);
1451    ss << "[PC,CPSR]";
1452    return ss.str();
1453}
1454
1455std::string
1456MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1457{
1458    std::stringstream ss;
1459    printMnemonic(ss);
1460    printReg(ss, ura);
1461    ccprintf(ss, ", ");
1462    printReg(ss, urb);
1463    printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1464    return ss.str();
1465}
1466
1467std::string
1468MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1469{
1470    std::stringstream ss;
1471    printMnemonic(ss);
1472    printReg(ss, ura);
1473    ss << ", ";
1474    printReg(ss, urb);
1475    return ss.str();
1476}
1477
1478std::string
1479MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1480{
1481    std::stringstream ss;
1482    printMnemonic(ss);
1483    printReg(ss, ura);
1484    ss << ", ";
1485    printReg(ss, urb);
1486    ss << ", ";
1487    printReg(ss, urc);
1488    return ss.str();
1489}
1490
1491std::string
1492MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1493{
1494    std::stringstream ss;
1495    printMnemonic(ss);
1496    if (isFloating())
1497        printReg(ss, ura + FP_Reg_Base);
1498    else
1499        printReg(ss, ura);
1500    ss << ", [";
1501    printReg(ss, urb);
1502    ss << ", ";
1503    ccprintf(ss, "#%d", imm);
1504    ss << "]";
1505    return ss.str();
1506}
1507
1508}
1509