macromem.cc revision 10037:5cac77888310
1/*
2 * Copyright (c) 2010-2013 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include <sstream>
44
45#include "arch/arm/insts/macromem.hh"
46
47#include "arch/arm/generated/decoder.hh"
48#include "arch/arm/insts/neon64_mem.hh"
49
50using namespace std;
51using namespace ArmISAInst;
52
53namespace ArmISA
54{
55
56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57                       OpClass __opClass, IntRegIndex rn,
58                       bool index, bool up, bool user, bool writeback,
59                       bool load, uint32_t reglist) :
60    PredMacroOp(mnem, machInst, __opClass)
61{
62    uint32_t regs = reglist;
63    uint32_t ones = number_of_ones(reglist);
64    // Remember that writeback adds a uop or two and the temp register adds one
65    numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1;
66
67    // It's technically legal to do a lot of nothing
68    if (!ones)
69        numMicroops = 1;
70
71    microOps = new StaticInstPtr[numMicroops];
72    uint32_t addr = 0;
73
74    if (!up)
75        addr = (ones << 2) - 4;
76
77    if (!index)
78        addr += 4;
79
80    StaticInstPtr *uop = microOps;
81
82    // Add 0 to Rn and stick it in ureg0.
83    // This is equivalent to a move.
84    *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
85
86    unsigned reg = 0;
87    unsigned regIdx = 0;
88    bool force_user = user & !bits(reglist, 15);
89    bool exception_ret = user & bits(reglist, 15);
90
91    for (int i = 0; i < ones; i++) {
92        // Find the next register.
93        while (!bits(regs, reg))
94            reg++;
95        replaceBits(regs, reg, 0);
96
97        regIdx = reg;
98        if (force_user) {
99            regIdx = intRegInMode(MODE_USER, regIdx);
100        }
101
102        if (load) {
103            if (writeback && i == ones - 1) {
104                // If it's a writeback and this is the last register
105                // do the load into a temporary register which we'll move
106                // into the final one later
107                *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0,
108                        up, addr);
109            } else {
110                // Otherwise just do it normally
111                if (reg == INTREG_PC && exception_ret) {
112                    // This must be the exception return form of ldm.
113                    *++uop = new MicroLdrRetUop(machInst, regIdx,
114                                               INTREG_UREG0, up, addr);
115                    if (!(condCode == COND_AL || condCode == COND_UC))
116                        (*uop)->setFlag(StaticInst::IsCondControl);
117                    else
118                        (*uop)->setFlag(StaticInst::IsUncondControl);
119                } else {
120                    *++uop = new MicroLdrUop(machInst, regIdx,
121                                            INTREG_UREG0, up, addr);
122                    if (reg == INTREG_PC) {
123                        (*uop)->setFlag(StaticInst::IsControl);
124                        if (!(condCode == COND_AL || condCode == COND_UC))
125                            (*uop)->setFlag(StaticInst::IsCondControl);
126                        else
127                            (*uop)->setFlag(StaticInst::IsUncondControl);
128                        (*uop)->setFlag(StaticInst::IsIndirectControl);
129                    }
130                }
131            }
132        } else {
133            *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);
134        }
135
136        if (up)
137            addr += 4;
138        else
139            addr -= 4;
140    }
141
142    if (writeback && ones) {
143        // put the register update after we're done all loading
144        if (up)
145            *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4);
146        else
147            *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4);
148
149        // If this was a load move the last temporary value into place
150        // this way we can't take an exception after we update the base
151        // register.
152        if (load && reg == INTREG_PC && exception_ret) {
153            *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
154            if (!(condCode == COND_AL || condCode == COND_UC))
155                (*uop)->setFlag(StaticInst::IsCondControl);
156            else
157                (*uop)->setFlag(StaticInst::IsUncondControl);
158        } else if (load) {
159            *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1);
160            if (reg == INTREG_PC) {
161                (*uop)->setFlag(StaticInst::IsControl);
162                (*uop)->setFlag(StaticInst::IsCondControl);
163                (*uop)->setFlag(StaticInst::IsIndirectControl);
164                // This is created as a RAS POP
165                if (rn == INTREG_SP)
166                    (*uop)->setFlag(StaticInst::IsReturn);
167
168            }
169        }
170    }
171
172    (*uop)->setLastMicroop();
173
174    for (StaticInstPtr *curUop = microOps;
175            !(*curUop)->isLastMicroop(); curUop++) {
176        MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
177        assert(uopPtr);
178        uopPtr->setDelayedCommit();
179    }
180}
181
182PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
183                     uint32_t size, bool fp, bool load, bool noAlloc,
184                     bool signExt, bool exclusive, bool acrel,
185                     int64_t imm, AddrMode mode,
186                     IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
187    PredMacroOp(mnem, machInst, __opClass)
188{
189    bool writeback = (mode != AddrMd_Offset);
190    numMicroops = 1 + (size / 4) + (writeback ? 1 : 0);
191    microOps = new StaticInstPtr[numMicroops];
192
193    StaticInstPtr *uop = microOps;
194
195    bool post = (mode == AddrMd_PostIndex);
196
197    rn = makeSP(rn);
198
199    *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm);
200
201    if (fp) {
202        if (size == 16) {
203            if (load) {
204                *++uop = new MicroLdrQBFpXImmUop(machInst, rt,
205                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
206                *++uop = new MicroLdrQTFpXImmUop(machInst, rt,
207                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
208                *++uop = new MicroLdrQBFpXImmUop(machInst, rt2,
209                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
210                *++uop = new MicroLdrQTFpXImmUop(machInst, rt2,
211                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
212            } else {
213                *++uop = new MicroStrQBFpXImmUop(machInst, rt,
214                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
215                *++uop = new MicroStrQTFpXImmUop(machInst, rt,
216                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
217                *++uop = new MicroStrQBFpXImmUop(machInst, rt2,
218                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
219                *++uop = new MicroStrQTFpXImmUop(machInst, rt2,
220                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
221            }
222        } else if (size == 8) {
223            if (load) {
224                *++uop = new MicroLdrFpXImmUop(machInst, rt,
225                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
226                *++uop = new MicroLdrFpXImmUop(machInst, rt2,
227                        INTREG_UREG0, 8, noAlloc, exclusive, acrel);
228            } else {
229                *++uop = new MicroStrFpXImmUop(machInst, rt,
230                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
231                *++uop = new MicroStrFpXImmUop(machInst, rt2,
232                        INTREG_UREG0, 8, noAlloc, exclusive, acrel);
233            }
234        } else if (size == 4) {
235            if (load) {
236                *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2,
237                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
238            } else {
239                *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2,
240                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
241            }
242        }
243    } else {
244        if (size == 8) {
245            if (load) {
246                *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0,
247                        0, noAlloc, exclusive, acrel);
248                *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0,
249                        size, noAlloc, exclusive, acrel);
250            } else {
251                *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0,
252                        0, noAlloc, exclusive, acrel);
253                *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0,
254                        size, noAlloc, exclusive, acrel);
255            }
256        } else if (size == 4) {
257            if (load) {
258                if (signExt) {
259                    *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2,
260                            INTREG_UREG0, 0, noAlloc, exclusive, acrel);
261                } else {
262                    *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2,
263                            INTREG_UREG0, 0, noAlloc, exclusive, acrel);
264                }
265            } else {
266                *++uop = new MicroStrDXImmUop(machInst, rt, rt2,
267                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
268            }
269        }
270    }
271
272    if (writeback) {
273        *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0,
274                                   post ? imm : 0);
275    }
276
277    (*uop)->setLastMicroop();
278
279    for (StaticInstPtr *curUop = microOps;
280            !(*curUop)->isLastMicroop(); curUop++) {
281        (*curUop)->setDelayedCommit();
282    }
283}
284
285BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
286                             OpClass __opClass, bool load, IntRegIndex dest,
287                             IntRegIndex base, int64_t imm) :
288    PredMacroOp(mnem, machInst, __opClass)
289{
290    numMicroops = 2;
291    microOps = new StaticInstPtr[numMicroops];
292
293    if (load) {
294        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
295        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
296    } else {
297        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
298        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
299    }
300    microOps[0]->setDelayedCommit();
301    microOps[1]->setLastMicroop();
302}
303
304BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
305                               OpClass __opClass, bool load, IntRegIndex dest,
306                               IntRegIndex base, int64_t imm) :
307    PredMacroOp(mnem, machInst, __opClass)
308{
309    numMicroops = 3;
310    microOps = new StaticInstPtr[numMicroops];
311
312    if (load) {
313        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0);
314        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0);
315    } else {
316        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0);
317        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
318    }
319    microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
320
321    microOps[0]->setDelayedCommit();
322    microOps[1]->setDelayedCommit();
323    microOps[2]->setLastMicroop();
324}
325
326BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
327                             OpClass __opClass, bool load, IntRegIndex dest,
328                             IntRegIndex base, int64_t imm) :
329    PredMacroOp(mnem, machInst, __opClass)
330{
331    numMicroops = 3;
332    microOps = new StaticInstPtr[numMicroops];
333
334    if (load) {
335        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
336        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
337    } else {
338        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
339        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
340    }
341    microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
342
343    microOps[0]->setDelayedCommit();
344    microOps[1]->setDelayedCommit();
345    microOps[2]->setLastMicroop();
346}
347
348BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
349                             OpClass __opClass, bool load, IntRegIndex dest,
350                             IntRegIndex base, IntRegIndex offset,
351                             ArmExtendType type, int64_t imm) :
352    PredMacroOp(mnem, machInst, __opClass)
353{
354    numMicroops = 2;
355    microOps = new StaticInstPtr[numMicroops];
356
357    if (load) {
358        microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base,
359                                              offset, type, imm);
360        microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base,
361                                              offset, type, imm);
362    } else {
363        microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base,
364                                              offset, type, imm);
365        microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base,
366                                              offset, type, imm);
367    }
368
369    microOps[0]->setDelayedCommit();
370    microOps[1]->setLastMicroop();
371}
372
373BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
374                             OpClass __opClass, IntRegIndex dest,
375                             int64_t imm) :
376    PredMacroOp(mnem, machInst, __opClass)
377{
378    numMicroops = 2;
379    microOps = new StaticInstPtr[numMicroops];
380
381    microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm);
382    microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm);
383
384    microOps[0]->setDelayedCommit();
385    microOps[1]->setLastMicroop();
386}
387
388VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
389                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
390                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
391    PredMacroOp(mnem, machInst, __opClass)
392{
393    assert(regs > 0 && regs <= 4);
394    assert(regs % elems == 0);
395
396    numMicroops = (regs > 2) ? 2 : 1;
397    bool wb = (rm != 15);
398    bool deinterleave = (elems > 1);
399
400    if (wb) numMicroops++;
401    if (deinterleave) numMicroops += (regs / elems);
402    microOps = new StaticInstPtr[numMicroops];
403
404    RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
405
406    uint32_t noAlign = TLB::MustBeOne;
407
408    unsigned uopIdx = 0;
409    switch (regs) {
410      case 4:
411        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
412                size, machInst, rMid, rn, 0, align);
413        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
414                size, machInst, rMid + 4, rn, 16, noAlign);
415        break;
416      case 3:
417        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
418                size, machInst, rMid, rn, 0, align);
419        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
420                size, machInst, rMid + 4, rn, 16, noAlign);
421        break;
422      case 2:
423        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
424                size, machInst, rMid, rn, 0, align);
425        break;
426      case 1:
427        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
428                size, machInst, rMid, rn, 0, align);
429        break;
430      default:
431        // Unknown number of registers
432        microOps[uopIdx++] = new Unknown(machInst);
433    }
434    if (wb) {
435        if (rm != 15 && rm != 13) {
436            microOps[uopIdx++] =
437                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
438        } else {
439            microOps[uopIdx++] =
440                new MicroAddiUop(machInst, rn, rn, regs * 8);
441        }
442    }
443    if (deinterleave) {
444        switch (elems) {
445          case 4:
446            assert(regs == 4);
447            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
448                    size, machInst, vd * 2, rMid, inc * 2);
449            break;
450          case 3:
451            assert(regs == 3);
452            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
453                    size, machInst, vd * 2, rMid, inc * 2);
454            break;
455          case 2:
456            assert(regs == 4 || regs == 2);
457            if (regs == 4) {
458                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
459                        size, machInst, vd * 2, rMid, inc * 2);
460                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
461                        size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
462            } else {
463                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
464                        size, machInst, vd * 2, rMid, inc * 2);
465            }
466            break;
467          default:
468            // Bad number of elements to deinterleave
469            microOps[uopIdx++] = new Unknown(machInst);
470        }
471    }
472    assert(uopIdx == numMicroops);
473
474    for (unsigned i = 0; i < numMicroops - 1; i++) {
475        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
476        assert(uopPtr);
477        uopPtr->setDelayedCommit();
478    }
479    microOps[numMicroops - 1]->setLastMicroop();
480}
481
482VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
483                         OpClass __opClass, bool all, unsigned elems,
484                         RegIndex rn, RegIndex vd, unsigned regs,
485                         unsigned inc, uint32_t size, uint32_t align,
486                         RegIndex rm, unsigned lane) :
487    PredMacroOp(mnem, machInst, __opClass)
488{
489    assert(regs > 0 && regs <= 4);
490    assert(regs % elems == 0);
491
492    unsigned eBytes = (1 << size);
493    unsigned loadSize = eBytes * elems;
494    unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
495                        sizeof(FloatRegBits);
496
497    assert(loadRegs > 0 && loadRegs <= 4);
498
499    numMicroops = 1;
500    bool wb = (rm != 15);
501
502    if (wb) numMicroops++;
503    numMicroops += (regs / elems);
504    microOps = new StaticInstPtr[numMicroops];
505
506    RegIndex ufp0 = NumFloatV7ArchRegs;
507
508    unsigned uopIdx = 0;
509    switch (loadSize) {
510      case 1:
511        microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
512                machInst, ufp0, rn, 0, align);
513        break;
514      case 2:
515        if (eBytes == 2) {
516            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
517                    machInst, ufp0, rn, 0, align);
518        } else {
519            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
520                    machInst, ufp0, rn, 0, align);
521        }
522        break;
523      case 3:
524        microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
525                machInst, ufp0, rn, 0, align);
526        break;
527      case 4:
528        switch (eBytes) {
529          case 1:
530            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
531                    machInst, ufp0, rn, 0, align);
532            break;
533          case 2:
534            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
535                    machInst, ufp0, rn, 0, align);
536            break;
537          case 4:
538            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
539                    machInst, ufp0, rn, 0, align);
540            break;
541        }
542        break;
543      case 6:
544        microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
545                machInst, ufp0, rn, 0, align);
546        break;
547      case 8:
548        switch (eBytes) {
549          case 2:
550            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
551                    machInst, ufp0, rn, 0, align);
552            break;
553          case 4:
554            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
555                    machInst, ufp0, rn, 0, align);
556            break;
557        }
558        break;
559      case 12:
560        microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
561                machInst, ufp0, rn, 0, align);
562        break;
563      case 16:
564        microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
565                machInst, ufp0, rn, 0, align);
566        break;
567      default:
568        // Unrecognized load size
569        microOps[uopIdx++] = new Unknown(machInst);
570    }
571    if (wb) {
572        if (rm != 15 && rm != 13) {
573            microOps[uopIdx++] =
574                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
575        } else {
576            microOps[uopIdx++] =
577                new MicroAddiUop(machInst, rn, rn, loadSize);
578        }
579    }
580    switch (elems) {
581      case 4:
582        assert(regs == 4);
583        switch (size) {
584          case 0:
585            if (all) {
586                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
587                        machInst, vd * 2, ufp0, inc * 2);
588            } else {
589                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
590                        machInst, vd * 2, ufp0, inc * 2, lane);
591            }
592            break;
593          case 1:
594            if (all) {
595                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
596                        machInst, vd * 2, ufp0, inc * 2);
597            } else {
598                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
599                        machInst, vd * 2, ufp0, inc * 2, lane);
600            }
601            break;
602          case 2:
603            if (all) {
604                microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
605                        machInst, vd * 2, ufp0, inc * 2);
606            } else {
607                microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
608                        machInst, vd * 2, ufp0, inc * 2, lane);
609            }
610            break;
611          default:
612            // Bad size
613            microOps[uopIdx++] = new Unknown(machInst);
614            break;
615        }
616        break;
617      case 3:
618        assert(regs == 3);
619        switch (size) {
620          case 0:
621            if (all) {
622                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
623                        machInst, vd * 2, ufp0, inc * 2);
624            } else {
625                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
626                        machInst, vd * 2, ufp0, inc * 2, lane);
627            }
628            break;
629          case 1:
630            if (all) {
631                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
632                        machInst, vd * 2, ufp0, inc * 2);
633            } else {
634                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
635                        machInst, vd * 2, ufp0, inc * 2, lane);
636            }
637            break;
638          case 2:
639            if (all) {
640                microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
641                        machInst, vd * 2, ufp0, inc * 2);
642            } else {
643                microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
644                        machInst, vd * 2, ufp0, inc * 2, lane);
645            }
646            break;
647          default:
648            // Bad size
649            microOps[uopIdx++] = new Unknown(machInst);
650            break;
651        }
652        break;
653      case 2:
654        assert(regs == 2);
655        assert(loadRegs <= 2);
656        switch (size) {
657          case 0:
658            if (all) {
659                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
660                        machInst, vd * 2, ufp0, inc * 2);
661            } else {
662                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
663                        machInst, vd * 2, ufp0, inc * 2, lane);
664            }
665            break;
666          case 1:
667            if (all) {
668                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
669                        machInst, vd * 2, ufp0, inc * 2);
670            } else {
671                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
672                        machInst, vd * 2, ufp0, inc * 2, lane);
673            }
674            break;
675          case 2:
676            if (all) {
677                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
678                        machInst, vd * 2, ufp0, inc * 2);
679            } else {
680                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
681                        machInst, vd * 2, ufp0, inc * 2, lane);
682            }
683            break;
684          default:
685            // Bad size
686            microOps[uopIdx++] = new Unknown(machInst);
687            break;
688        }
689        break;
690      case 1:
691        assert(regs == 1 || (all && regs == 2));
692        assert(loadRegs <= 2);
693        for (unsigned offset = 0; offset < regs; offset++) {
694            switch (size) {
695              case 0:
696                if (all) {
697                    microOps[uopIdx++] =
698                        new MicroUnpackAllNeon2to2Uop<uint8_t>(
699                            machInst, (vd + offset) * 2, ufp0, inc * 2);
700                } else {
701                    microOps[uopIdx++] =
702                        new MicroUnpackNeon2to2Uop<uint8_t>(
703                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
704                }
705                break;
706              case 1:
707                if (all) {
708                    microOps[uopIdx++] =
709                        new MicroUnpackAllNeon2to2Uop<uint16_t>(
710                            machInst, (vd + offset) * 2, ufp0, inc * 2);
711                } else {
712                    microOps[uopIdx++] =
713                        new MicroUnpackNeon2to2Uop<uint16_t>(
714                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
715                }
716                break;
717              case 2:
718                if (all) {
719                    microOps[uopIdx++] =
720                        new MicroUnpackAllNeon2to2Uop<uint32_t>(
721                            machInst, (vd + offset) * 2, ufp0, inc * 2);
722                } else {
723                    microOps[uopIdx++] =
724                        new MicroUnpackNeon2to2Uop<uint32_t>(
725                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
726                }
727                break;
728              default:
729                // Bad size
730                microOps[uopIdx++] = new Unknown(machInst);
731                break;
732            }
733        }
734        break;
735      default:
736        // Bad number of elements to unpack
737        microOps[uopIdx++] = new Unknown(machInst);
738    }
739    assert(uopIdx == numMicroops);
740
741    for (unsigned i = 0; i < numMicroops - 1; i++) {
742        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
743        assert(uopPtr);
744        uopPtr->setDelayedCommit();
745    }
746    microOps[numMicroops - 1]->setLastMicroop();
747}
748
749VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
750                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
751                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
752    PredMacroOp(mnem, machInst, __opClass)
753{
754    assert(regs > 0 && regs <= 4);
755    assert(regs % elems == 0);
756
757    numMicroops = (regs > 2) ? 2 : 1;
758    bool wb = (rm != 15);
759    bool interleave = (elems > 1);
760
761    if (wb) numMicroops++;
762    if (interleave) numMicroops += (regs / elems);
763    microOps = new StaticInstPtr[numMicroops];
764
765    uint32_t noAlign = TLB::MustBeOne;
766
767    RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
768
769    unsigned uopIdx = 0;
770    if (interleave) {
771        switch (elems) {
772          case 4:
773            assert(regs == 4);
774            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
775                    size, machInst, rMid, vd * 2, inc * 2);
776            break;
777          case 3:
778            assert(regs == 3);
779            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
780                    size, machInst, rMid, vd * 2, inc * 2);
781            break;
782          case 2:
783            assert(regs == 4 || regs == 2);
784            if (regs == 4) {
785                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
786                        size, machInst, rMid, vd * 2, inc * 2);
787                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
788                        size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
789            } else {
790                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
791                        size, machInst, rMid, vd * 2, inc * 2);
792            }
793            break;
794          default:
795            // Bad number of elements to interleave
796            microOps[uopIdx++] = new Unknown(machInst);
797        }
798    }
799    switch (regs) {
800      case 4:
801        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
802                size, machInst, rMid, rn, 0, align);
803        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
804                size, machInst, rMid + 4, rn, 16, noAlign);
805        break;
806      case 3:
807        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
808                size, machInst, rMid, rn, 0, align);
809        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
810                size, machInst, rMid + 4, rn, 16, noAlign);
811        break;
812      case 2:
813        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
814                size, machInst, rMid, rn, 0, align);
815        break;
816      case 1:
817        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
818                size, machInst, rMid, rn, 0, align);
819        break;
820      default:
821        // Unknown number of registers
822        microOps[uopIdx++] = new Unknown(machInst);
823    }
824    if (wb) {
825        if (rm != 15 && rm != 13) {
826            microOps[uopIdx++] =
827                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
828        } else {
829            microOps[uopIdx++] =
830                new MicroAddiUop(machInst, rn, rn, regs * 8);
831        }
832    }
833    assert(uopIdx == numMicroops);
834
835    for (unsigned i = 0; i < numMicroops - 1; i++) {
836        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
837        assert(uopPtr);
838        uopPtr->setDelayedCommit();
839    }
840    microOps[numMicroops - 1]->setLastMicroop();
841}
842
843VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
844                         OpClass __opClass, bool all, unsigned elems,
845                         RegIndex rn, RegIndex vd, unsigned regs,
846                         unsigned inc, uint32_t size, uint32_t align,
847                         RegIndex rm, unsigned lane) :
848    PredMacroOp(mnem, machInst, __opClass)
849{
850    assert(!all);
851    assert(regs > 0 && regs <= 4);
852    assert(regs % elems == 0);
853
854    unsigned eBytes = (1 << size);
855    unsigned storeSize = eBytes * elems;
856    unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
857                         sizeof(FloatRegBits);
858
859    assert(storeRegs > 0 && storeRegs <= 4);
860
861    numMicroops = 1;
862    bool wb = (rm != 15);
863
864    if (wb) numMicroops++;
865    numMicroops += (regs / elems);
866    microOps = new StaticInstPtr[numMicroops];
867
868    RegIndex ufp0 = NumFloatV7ArchRegs;
869
870    unsigned uopIdx = 0;
871    switch (elems) {
872      case 4:
873        assert(regs == 4);
874        switch (size) {
875          case 0:
876            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
877                    machInst, ufp0, vd * 2, inc * 2, lane);
878            break;
879          case 1:
880            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
881                    machInst, ufp0, vd * 2, inc * 2, lane);
882            break;
883          case 2:
884            microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
885                    machInst, ufp0, vd * 2, inc * 2, lane);
886            break;
887          default:
888            // Bad size
889            microOps[uopIdx++] = new Unknown(machInst);
890            break;
891        }
892        break;
893      case 3:
894        assert(regs == 3);
895        switch (size) {
896          case 0:
897            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
898                    machInst, ufp0, vd * 2, inc * 2, lane);
899            break;
900          case 1:
901            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
902                    machInst, ufp0, vd * 2, inc * 2, lane);
903            break;
904          case 2:
905            microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
906                    machInst, ufp0, vd * 2, inc * 2, lane);
907            break;
908          default:
909            // Bad size
910            microOps[uopIdx++] = new Unknown(machInst);
911            break;
912        }
913        break;
914      case 2:
915        assert(regs == 2);
916        assert(storeRegs <= 2);
917        switch (size) {
918          case 0:
919            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
920                    machInst, ufp0, vd * 2, inc * 2, lane);
921            break;
922          case 1:
923            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
924                    machInst, ufp0, vd * 2, inc * 2, lane);
925            break;
926          case 2:
927            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
928                    machInst, ufp0, vd * 2, inc * 2, lane);
929            break;
930          default:
931            // Bad size
932            microOps[uopIdx++] = new Unknown(machInst);
933            break;
934        }
935        break;
936      case 1:
937        assert(regs == 1 || (all && regs == 2));
938        assert(storeRegs <= 2);
939        for (unsigned offset = 0; offset < regs; offset++) {
940            switch (size) {
941              case 0:
942                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
943                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
944                break;
945              case 1:
946                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
947                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
948                break;
949              case 2:
950                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
951                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
952                break;
953              default:
954                // Bad size
955                microOps[uopIdx++] = new Unknown(machInst);
956                break;
957            }
958        }
959        break;
960      default:
961        // Bad number of elements to unpack
962        microOps[uopIdx++] = new Unknown(machInst);
963    }
964    switch (storeSize) {
965      case 1:
966        microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
967                machInst, ufp0, rn, 0, align);
968        break;
969      case 2:
970        if (eBytes == 2) {
971            microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
972                    machInst, ufp0, rn, 0, align);
973        } else {
974            microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
975                    machInst, ufp0, rn, 0, align);
976        }
977        break;
978      case 3:
979        microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
980                machInst, ufp0, rn, 0, align);
981        break;
982      case 4:
983        switch (eBytes) {
984          case 1:
985            microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
986                    machInst, ufp0, rn, 0, align);
987            break;
988          case 2:
989            microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
990                    machInst, ufp0, rn, 0, align);
991            break;
992          case 4:
993            microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
994                    machInst, ufp0, rn, 0, align);
995            break;
996        }
997        break;
998      case 6:
999        microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1000                machInst, ufp0, rn, 0, align);
1001        break;
1002      case 8:
1003        switch (eBytes) {
1004          case 2:
1005            microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1006                    machInst, ufp0, rn, 0, align);
1007            break;
1008          case 4:
1009            microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1010                    machInst, ufp0, rn, 0, align);
1011            break;
1012        }
1013        break;
1014      case 12:
1015        microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1016                machInst, ufp0, rn, 0, align);
1017        break;
1018      case 16:
1019        microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1020                machInst, ufp0, rn, 0, align);
1021        break;
1022      default:
1023        // Bad store size
1024        microOps[uopIdx++] = new Unknown(machInst);
1025    }
1026    if (wb) {
1027        if (rm != 15 && rm != 13) {
1028            microOps[uopIdx++] =
1029                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1030        } else {
1031            microOps[uopIdx++] =
1032                new MicroAddiUop(machInst, rn, rn, storeSize);
1033        }
1034    }
1035    assert(uopIdx == numMicroops);
1036
1037    for (unsigned i = 0; i < numMicroops - 1; i++) {
1038        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1039        assert(uopPtr);
1040        uopPtr->setDelayedCommit();
1041    }
1042    microOps[numMicroops - 1]->setLastMicroop();
1043}
1044
1045VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1046                         OpClass __opClass, RegIndex rn, RegIndex vd,
1047                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
1048                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
1049    PredMacroOp(mnem, machInst, __opClass)
1050{
1051    RegIndex vx = NumFloatV8ArchRegs / 4;
1052    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1053    bool baseIsSP = isSP((IntRegIndex) rnsp);
1054
1055    numMicroops = wb ? 1 : 0;
1056
1057    int totNumBytes = numRegs * dataSize / 8;
1058    assert(totNumBytes <= 64);
1059
1060    // The guiding principle here is that no more than 16 bytes can be
1061    // transferred at a time
1062    int numMemMicroops = totNumBytes / 16;
1063    int residuum = totNumBytes % 16;
1064    if (residuum)
1065        ++numMemMicroops;
1066    numMicroops += numMemMicroops;
1067
1068    int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1069    numMicroops += numMarshalMicroops;
1070
1071    microOps = new StaticInstPtr[numMicroops];
1072    unsigned uopIdx = 0;
1073    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1074        TLB::AllowUnaligned;
1075
1076    int i = 0;
1077    for(; i < numMemMicroops - 1; ++i) {
1078        microOps[uopIdx++] = new MicroNeonLoad64(
1079            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1080            baseIsSP, 16 /* accSize */, eSize);
1081    }
1082    microOps[uopIdx++] =  new MicroNeonLoad64(
1083        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1084        residuum ? residuum : 16 /* accSize */, eSize);
1085
1086    // Writeback microop: the post-increment amount is encoded in "Rm": a
1087    // 64-bit general register OR as '11111' for an immediate value equal to
1088    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1089    if (wb) {
1090        if (rm != ((RegIndex) INTREG_X31)) {
1091            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1092                                                      UXTX, 0);
1093        } else {
1094            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1095                                                   totNumBytes);
1096        }
1097    }
1098
1099    for (int i = 0; i < numMarshalMicroops; ++i) {
1100        microOps[uopIdx++] = new MicroDeintNeon64(
1101            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1102            numStructElems, numRegs, i /* step */);
1103    }
1104
1105    assert(uopIdx == numMicroops);
1106
1107    for (int i = 0; i < numMicroops - 1; ++i) {
1108        microOps[i]->setDelayedCommit();
1109    }
1110    microOps[numMicroops - 1]->setLastMicroop();
1111}
1112
1113VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1114                         OpClass __opClass, RegIndex rn, RegIndex vd,
1115                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
1116                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
1117    PredMacroOp(mnem, machInst, __opClass)
1118{
1119    RegIndex vx = NumFloatV8ArchRegs / 4;
1120    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1121    bool baseIsSP = isSP((IntRegIndex) rnsp);
1122
1123    numMicroops = wb ? 1 : 0;
1124
1125    int totNumBytes = numRegs * dataSize / 8;
1126    assert(totNumBytes <= 64);
1127
1128    // The guiding principle here is that no more than 16 bytes can be
1129    // transferred at a time
1130    int numMemMicroops = totNumBytes / 16;
1131    int residuum = totNumBytes % 16;
1132    if (residuum)
1133        ++numMemMicroops;
1134    numMicroops += numMemMicroops;
1135
1136    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1137    numMicroops += numMarshalMicroops;
1138
1139    microOps = new StaticInstPtr[numMicroops];
1140    unsigned uopIdx = 0;
1141
1142    for(int i = 0; i < numMarshalMicroops; ++i) {
1143        microOps[uopIdx++] = new MicroIntNeon64(
1144            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1145            numStructElems, numRegs, i /* step */);
1146    }
1147
1148    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1149        TLB::AllowUnaligned;
1150
1151    int i = 0;
1152    for(; i < numMemMicroops - 1; ++i) {
1153        microOps[uopIdx++] = new MicroNeonStore64(
1154            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1155            baseIsSP, 16 /* accSize */, eSize);
1156    }
1157    microOps[uopIdx++] = new MicroNeonStore64(
1158        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1159        residuum ? residuum : 16 /* accSize */, eSize);
1160
1161    // Writeback microop: the post-increment amount is encoded in "Rm": a
1162    // 64-bit general register OR as '11111' for an immediate value equal to
1163    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1164    if (wb) {
1165        if (rm != ((RegIndex) INTREG_X31)) {
1166            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1167                                                      UXTX, 0);
1168        } else {
1169            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1170                                                   totNumBytes);
1171        }
1172    }
1173
1174    assert(uopIdx == numMicroops);
1175
1176    for (int i = 0; i < numMicroops - 1; i++) {
1177        microOps[i]->setDelayedCommit();
1178    }
1179    microOps[numMicroops - 1]->setLastMicroop();
1180}
1181
1182VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1183                             OpClass __opClass, RegIndex rn, RegIndex vd,
1184                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
1185                             uint8_t numStructElems, uint8_t index, bool wb,
1186                             bool replicate) :
1187    PredMacroOp(mnem, machInst, __opClass)
1188{
1189    RegIndex vx = NumFloatV8ArchRegs / 4;
1190    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1191    bool baseIsSP = isSP((IntRegIndex) rnsp);
1192
1193    numMicroops = wb ? 1 : 0;
1194
1195    int eSizeBytes = 1 << eSize;
1196    int totNumBytes = numStructElems * eSizeBytes;
1197    assert(totNumBytes <= 64);
1198
1199    // The guiding principle here is that no more than 16 bytes can be
1200    // transferred at a time
1201    int numMemMicroops = totNumBytes / 16;
1202    int residuum = totNumBytes % 16;
1203    if (residuum)
1204        ++numMemMicroops;
1205    numMicroops += numMemMicroops;
1206
1207    int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1208    numMicroops += numMarshalMicroops;
1209
1210    microOps = new StaticInstPtr[numMicroops];
1211    unsigned uopIdx = 0;
1212
1213    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1214        TLB::AllowUnaligned;
1215
1216    int i = 0;
1217    for (; i < numMemMicroops - 1; ++i) {
1218        microOps[uopIdx++] = new MicroNeonLoad64(
1219            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1220            baseIsSP, 16 /* accSize */, eSize);
1221    }
1222    microOps[uopIdx++] = new MicroNeonLoad64(
1223        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1224        residuum ? residuum : 16 /* accSize */, eSize);
1225
1226    // Writeback microop: the post-increment amount is encoded in "Rm": a
1227    // 64-bit general register OR as '11111' for an immediate value equal to
1228    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1229    if (wb) {
1230        if (rm != ((RegIndex) INTREG_X31)) {
1231            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1232                                                      UXTX, 0);
1233        } else {
1234            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1235                                                   totNumBytes);
1236        }
1237    }
1238
1239    for(int i = 0; i < numMarshalMicroops; ++i) {
1240        microOps[uopIdx++] = new MicroUnpackNeon64(
1241            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1242            numStructElems, index, i /* step */, replicate);
1243    }
1244
1245    assert(uopIdx == numMicroops);
1246
1247    for (int i = 0; i < numMicroops - 1; i++) {
1248        microOps[i]->setDelayedCommit();
1249    }
1250    microOps[numMicroops - 1]->setLastMicroop();
1251}
1252
1253VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1254                             OpClass __opClass, RegIndex rn, RegIndex vd,
1255                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
1256                             uint8_t numStructElems, uint8_t index, bool wb,
1257                             bool replicate) :
1258    PredMacroOp(mnem, machInst, __opClass)
1259{
1260    RegIndex vx = NumFloatV8ArchRegs / 4;
1261    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1262    bool baseIsSP = isSP((IntRegIndex) rnsp);
1263
1264    numMicroops = wb ? 1 : 0;
1265
1266    int eSizeBytes = 1 << eSize;
1267    int totNumBytes = numStructElems * eSizeBytes;
1268    assert(totNumBytes <= 64);
1269
1270    // The guiding principle here is that no more than 16 bytes can be
1271    // transferred at a time
1272    int numMemMicroops = totNumBytes / 16;
1273    int residuum = totNumBytes % 16;
1274    if (residuum)
1275        ++numMemMicroops;
1276    numMicroops += numMemMicroops;
1277
1278    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1279    numMicroops += numMarshalMicroops;
1280
1281    microOps = new StaticInstPtr[numMicroops];
1282    unsigned uopIdx = 0;
1283
1284    for(int i = 0; i < numMarshalMicroops; ++i) {
1285        microOps[uopIdx++] = new MicroPackNeon64(
1286            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1287            numStructElems, index, i /* step */, replicate);
1288    }
1289
1290    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1291        TLB::AllowUnaligned;
1292
1293    int i = 0;
1294    for(; i < numMemMicroops - 1; ++i) {
1295        microOps[uopIdx++] = new MicroNeonStore64(
1296            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1297            baseIsSP, 16 /* accsize */, eSize);
1298    }
1299    microOps[uopIdx++] = new MicroNeonStore64(
1300        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1301        residuum ? residuum : 16 /* accSize */, eSize);
1302
1303    // Writeback microop: the post-increment amount is encoded in "Rm": a
1304    // 64-bit general register OR as '11111' for an immediate value equal to
1305    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1306    if (wb) {
1307        if (rm != ((RegIndex) INTREG_X31)) {
1308            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1309                                                      UXTX, 0);
1310        } else {
1311            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1312                                                   totNumBytes);
1313        }
1314    }
1315
1316    assert(uopIdx == numMicroops);
1317
1318    for (int i = 0; i < numMicroops - 1; i++) {
1319        microOps[i]->setDelayedCommit();
1320    }
1321    microOps[numMicroops - 1]->setLastMicroop();
1322}
1323
1324MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1325                             OpClass __opClass, IntRegIndex rn,
1326                             RegIndex vd, bool single, bool up,
1327                             bool writeback, bool load, uint32_t offset) :
1328    PredMacroOp(mnem, machInst, __opClass)
1329{
1330    int i = 0;
1331
1332    // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1333    // to be functionally identical except that fldmx is deprecated. For now
1334    // we'll assume they're otherwise interchangable.
1335    int count = (single ? offset : (offset / 2));
1336    if (count == 0 || count > NumFloatV7ArchRegs)
1337        warn_once("Bad offset field for VFP load/store multiple.\n");
1338    if (count == 0) {
1339        // Force there to be at least one microop so the macroop makes sense.
1340        writeback = true;
1341    }
1342    if (count > NumFloatV7ArchRegs)
1343        count = NumFloatV7ArchRegs;
1344
1345    numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1346    microOps = new StaticInstPtr[numMicroops];
1347
1348    int64_t addr = 0;
1349
1350    if (!up)
1351        addr = 4 * offset;
1352
1353    bool tempUp = up;
1354    for (int j = 0; j < count; j++) {
1355        if (load) {
1356            if (single) {
1357                microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1358                                                  tempUp, addr);
1359            } else {
1360                microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1361                                                    tempUp, addr);
1362                microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1363                                                    addr + (up ? 4 : -4));
1364            }
1365        } else {
1366            if (single) {
1367                microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1368                                                  tempUp, addr);
1369            } else {
1370                microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1371                                                    tempUp, addr);
1372                microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1373                                                    addr + (up ? 4 : -4));
1374            }
1375        }
1376        if (!tempUp) {
1377            addr -= (single ? 4 : 8);
1378            // The microops don't handle negative displacement, so turn if we
1379            // hit zero, flip polarity and start adding.
1380            if (addr <= 0) {
1381                tempUp = true;
1382                addr = -addr;
1383            }
1384        } else {
1385            addr += (single ? 4 : 8);
1386        }
1387    }
1388
1389    if (writeback) {
1390        if (up) {
1391            microOps[i++] =
1392                new MicroAddiUop(machInst, rn, rn, 4 * offset);
1393        } else {
1394            microOps[i++] =
1395                new MicroSubiUop(machInst, rn, rn, 4 * offset);
1396        }
1397    }
1398
1399    assert(numMicroops == i);
1400    microOps[numMicroops - 1]->setLastMicroop();
1401
1402    for (StaticInstPtr *curUop = microOps;
1403            !(*curUop)->isLastMicroop(); curUop++) {
1404        MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1405        assert(uopPtr);
1406        uopPtr->setDelayedCommit();
1407    }
1408}
1409
1410std::string
1411MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1412{
1413    std::stringstream ss;
1414    printMnemonic(ss);
1415    printReg(ss, ura);
1416    ss << ", ";
1417    printReg(ss, urb);
1418    ss << ", ";
1419    ccprintf(ss, "#%d", imm);
1420    return ss.str();
1421}
1422
1423std::string
1424MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1425{
1426    std::stringstream ss;
1427    printMnemonic(ss);
1428    printReg(ss, ura);
1429    ss << ", ";
1430    printReg(ss, urb);
1431    ss << ", ";
1432    ccprintf(ss, "#%d", imm);
1433    return ss.str();
1434}
1435
1436std::string
1437MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1438{
1439    std::stringstream ss;
1440    printMnemonic(ss);
1441    ss << "[PC,CPSR]";
1442    return ss.str();
1443}
1444
1445std::string
1446MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1447{
1448    std::stringstream ss;
1449    printMnemonic(ss);
1450    printReg(ss, ura);
1451    ccprintf(ss, ", ");
1452    printReg(ss, urb);
1453    printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1454    return ss.str();
1455}
1456
1457std::string
1458MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1459{
1460    std::stringstream ss;
1461    printMnemonic(ss);
1462    printReg(ss, ura);
1463    ss << ", ";
1464    printReg(ss, urb);
1465    return ss.str();
1466}
1467
1468std::string
1469MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1470{
1471    std::stringstream ss;
1472    printMnemonic(ss);
1473    printReg(ss, ura);
1474    ss << ", ";
1475    printReg(ss, urb);
1476    ss << ", ";
1477    printReg(ss, urc);
1478    return ss.str();
1479}
1480
1481std::string
1482MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1483{
1484    std::stringstream ss;
1485    printMnemonic(ss);
1486    printReg(ss, ura);
1487    ss << ", [";
1488    printReg(ss, urb);
1489    ss << ", ";
1490    ccprintf(ss, "#%d", imm);
1491    ss << "]";
1492    return ss.str();
1493}
1494
1495}
1496