macromem.cc revision 10346:d96b61d843b2
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include <sstream>
44
45#include "arch/arm/insts/macromem.hh"
46
47#include "arch/arm/generated/decoder.hh"
48#include "arch/arm/insts/neon64_mem.hh"
49
50using namespace std;
51using namespace ArmISAInst;
52
53namespace ArmISA
54{
55
56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57                       OpClass __opClass, IntRegIndex rn,
58                       bool index, bool up, bool user, bool writeback,
59                       bool load, uint32_t reglist) :
60    PredMacroOp(mnem, machInst, __opClass)
61{
62    uint32_t regs = reglist;
63    uint32_t ones = number_of_ones(reglist);
64    uint32_t mem_ops = ones;
65
66    // Copy the base address register if we overwrite it, or if this instruction
67    // is basically a no-op (we have to do something)
68    bool copy_base =  (bits(reglist, rn) && load) || !ones;
69    bool force_user = user & !bits(reglist, 15);
70    bool exception_ret = user & bits(reglist, 15);
71    bool pc_temp = load && writeback && bits(reglist, 15);
72
73    if (!ones) {
74        numMicroops = 1;
75    } else if (load) {
76        numMicroops = ((ones + 1) / 2)
77                    + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
78                    + (copy_base ? 1 : 0)
79                    + (writeback? 1 : 0)
80                    + (pc_temp ? 1 : 0);
81    } else {
82        numMicroops = ones + (writeback ? 1 : 0);
83    }
84
85    microOps = new StaticInstPtr[numMicroops];
86
87    uint32_t addr = 0;
88
89    if (!up)
90        addr = (ones << 2) - 4;
91
92    if (!index)
93        addr += 4;
94
95    StaticInstPtr *uop = microOps;
96
97    // Add 0 to Rn and stick it in ureg0.
98    // This is equivalent to a move.
99    if (copy_base)
100        *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
101
102    unsigned reg = 0;
103    while (mem_ops != 0) {
104        // Do load operations in pairs if possible
105        if (load && mem_ops >= 2 &&
106            !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
107            // 64-bit memory operation
108            // Find 2 set register bits (clear them after finding)
109            unsigned reg_idx1;
110            unsigned reg_idx2;
111
112            // Find the first register
113            while (!bits(regs, reg)) reg++;
114            replaceBits(regs, reg, 0);
115            reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
116
117            // Find the second register
118            while (!bits(regs, reg)) reg++;
119            replaceBits(regs, reg, 0);
120            reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
121
122            // Load into temp reg if necessary
123            if (reg_idx2 == INTREG_PC && pc_temp)
124                reg_idx2 = INTREG_UREG1;
125
126            // Actually load both registers from memory
127            *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
128                    copy_base ? INTREG_UREG0 : rn, up, addr);
129
130            if (!writeback && reg_idx2 == INTREG_PC) {
131                // No writeback if idx==pc, set appropriate flags
132                (*uop)->setFlag(StaticInst::IsControl);
133                (*uop)->setFlag(StaticInst::IsIndirectControl);
134
135                if (!(condCode == COND_AL || condCode == COND_UC))
136                    (*uop)->setFlag(StaticInst::IsCondControl);
137                else
138                    (*uop)->setFlag(StaticInst::IsUncondControl);
139            }
140
141            if (up) addr += 8;
142            else addr -= 8;
143            mem_ops -= 2;
144        } else {
145            // 32-bit memory operation
146            // Find register for operation
147            unsigned reg_idx;
148            while(!bits(regs, reg)) reg++;
149            replaceBits(regs, reg, 0);
150            reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
151
152            if (load) {
153                if (writeback && reg_idx == INTREG_PC) {
154                    // If this instruction changes the PC and performs a
155                    // writeback, ensure the pc load/branch is the last uop.
156                    // Load into a temp reg here.
157                    *uop = new MicroLdrUop(machInst, INTREG_UREG1,
158                            copy_base ? INTREG_UREG0 : rn, up, addr);
159                } else if (reg_idx == INTREG_PC && exception_ret) {
160                    // Special handling for exception return
161                    *uop = new MicroLdrRetUop(machInst, reg_idx,
162                            copy_base ? INTREG_UREG0 : rn, up, addr);
163                } else {
164                    // standard single load uop
165                    *uop = new MicroLdrUop(machInst, reg_idx,
166                            copy_base ? INTREG_UREG0 : rn, up, addr);
167                }
168
169                // Loading pc as last operation?  Set appropriate flags.
170                if (!writeback && reg_idx == INTREG_PC) {
171                    (*uop)->setFlag(StaticInst::IsControl);
172                    (*uop)->setFlag(StaticInst::IsIndirectControl);
173
174                    if (!(condCode == COND_AL || condCode == COND_UC))
175                        (*uop)->setFlag(StaticInst::IsCondControl);
176                    else
177                        (*uop)->setFlag(StaticInst::IsUncondControl);
178                }
179            } else {
180                *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
181            }
182
183            if (up) addr += 4;
184            else addr -= 4;
185            --mem_ops;
186        }
187
188        // Load/store micro-op generated, go to next uop
189        ++uop;
190    }
191
192    if (writeback && ones) {
193        // Perform writeback uop operation
194        if (up)
195            *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
196        else
197            *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
198
199        // Write PC after address writeback?
200        if (pc_temp) {
201            if (exception_ret) {
202                *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
203            } else {
204                *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
205            }
206            (*uop)->setFlag(StaticInst::IsControl);
207            (*uop)->setFlag(StaticInst::IsIndirectControl);
208
209            if (!(condCode == COND_AL || condCode == COND_UC))
210                (*uop)->setFlag(StaticInst::IsCondControl);
211            else
212                (*uop)->setFlag(StaticInst::IsUncondControl);
213
214            if (rn == INTREG_SP)
215                (*uop)->setFlag(StaticInst::IsReturn);
216
217            ++uop;
218        }
219    }
220
221    --uop;
222    (*uop)->setLastMicroop();
223
224    /* Take the control flags from the last microop for the macroop */
225    if ((*uop)->isControl())
226        setFlag(StaticInst::IsControl);
227    if ((*uop)->isCondCtrl())
228        setFlag(StaticInst::IsCondControl);
229    if ((*uop)->isUncondCtrl())
230        setFlag(StaticInst::IsUncondControl);
231    if ((*uop)->isIndirectCtrl())
232        setFlag(StaticInst::IsIndirectControl);
233    if ((*uop)->isReturn())
234        setFlag(StaticInst::IsReturn);
235
236    for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
237        (*uop)->setDelayedCommit();
238    }
239}
240
241PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
242                     uint32_t size, bool fp, bool load, bool noAlloc,
243                     bool signExt, bool exclusive, bool acrel,
244                     int64_t imm, AddrMode mode,
245                     IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
246    PredMacroOp(mnem, machInst, __opClass)
247{
248    bool post = (mode == AddrMd_PostIndex);
249    bool writeback = (mode != AddrMd_Offset);
250
251    if (load) {
252        // Use integer rounding to round up loads of size 4
253        numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
254    } else {
255        numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
256    }
257    microOps = new StaticInstPtr[numMicroops];
258
259    StaticInstPtr *uop = microOps;
260
261    rn = makeSP(rn);
262
263    if (!post) {
264        *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
265                post ? 0 : imm);
266    }
267
268    if (fp) {
269        if (size == 16) {
270            if (load) {
271                *uop++ = new MicroLdFp16Uop(machInst, rt,
272                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
273                *uop++ = new MicroLdFp16Uop(machInst, rt2,
274                        post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
275            } else {
276                *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
277                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
278                *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
279                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
280                *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
281                        post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
282                *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
283                        post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
284            }
285        } else if (size == 8) {
286            if (load) {
287                *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
288                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
289            } else {
290                *uop++ = new MicroStrFpXImmUop(machInst, rt,
291                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
292                *uop++ = new MicroStrFpXImmUop(machInst, rt2,
293                        post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
294            }
295        } else if (size == 4) {
296            if (load) {
297                *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
298                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
299            } else {
300                *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
301                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
302            }
303        }
304    } else {
305        if (size == 8) {
306            if (load) {
307                *uop++ = new MicroLdPairUop(machInst, rt, rt2,
308                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
309            } else {
310                *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
311                        0, noAlloc, exclusive, acrel);
312                *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
313                        size, noAlloc, exclusive, acrel);
314            }
315        } else if (size == 4) {
316            if (load) {
317                if (signExt) {
318                    *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
319                            post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
320                } else {
321                    *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
322                            post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
323                }
324            } else {
325                *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
326                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
327            }
328        }
329    }
330
331    if (writeback) {
332        *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
333                                   post ? imm : 0);
334    }
335
336    assert(uop == &microOps[numMicroops]);
337    (*--uop)->setLastMicroop();
338
339    for (StaticInstPtr *curUop = microOps;
340            !(*curUop)->isLastMicroop(); curUop++) {
341        (*curUop)->setDelayedCommit();
342    }
343}
344
345BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
346                             OpClass __opClass, bool load, IntRegIndex dest,
347                             IntRegIndex base, int64_t imm) :
348    PredMacroOp(mnem, machInst, __opClass)
349{
350    numMicroops = load ? 1 : 2;
351    microOps = new StaticInstPtr[numMicroops];
352
353    StaticInstPtr *uop = microOps;
354
355    if (load) {
356        *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
357    } else {
358        *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
359        (*uop)->setDelayedCommit();
360        *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
361    }
362    (*uop)->setLastMicroop();
363}
364
365BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
366                               OpClass __opClass, bool load, IntRegIndex dest,
367                               IntRegIndex base, int64_t imm) :
368    PredMacroOp(mnem, machInst, __opClass)
369{
370    numMicroops = load ? 2 : 3;
371    microOps = new StaticInstPtr[numMicroops];
372
373    StaticInstPtr *uop = microOps;
374
375    if (load) {
376        *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
377    } else {
378        *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
379        *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
380    }
381    *uop = new MicroAddXiUop(machInst, base, base, imm);
382    (*uop)->setLastMicroop();
383
384    for (StaticInstPtr *curUop = microOps;
385            !(*curUop)->isLastMicroop(); curUop++) {
386        (*curUop)->setDelayedCommit();
387    }
388}
389
390BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
391                             OpClass __opClass, bool load, IntRegIndex dest,
392                             IntRegIndex base, int64_t imm) :
393    PredMacroOp(mnem, machInst, __opClass)
394{
395    numMicroops = load ? 2 : 3;
396    microOps = new StaticInstPtr[numMicroops];
397
398    StaticInstPtr *uop = microOps;
399
400    if (load) {
401        *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
402    } else {
403        *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
404        *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
405    }
406    *uop = new MicroAddXiUop(machInst, base, base, imm);
407    (*uop)->setLastMicroop();
408
409    for (StaticInstPtr *curUop = microOps;
410            !(*curUop)->isLastMicroop(); curUop++) {
411        (*curUop)->setDelayedCommit();
412    }
413}
414
415BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
416                             OpClass __opClass, bool load, IntRegIndex dest,
417                             IntRegIndex base, IntRegIndex offset,
418                             ArmExtendType type, int64_t imm) :
419    PredMacroOp(mnem, machInst, __opClass)
420{
421    numMicroops = load ? 1 : 2;
422    microOps = new StaticInstPtr[numMicroops];
423
424    StaticInstPtr *uop = microOps;
425
426    if (load) {
427        *uop = new MicroLdFp16RegUop(machInst, dest, base,
428                                  offset, type, imm);
429    } else {
430        *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
431                                       offset, type, imm);
432        (*uop)->setDelayedCommit();
433        *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
434                                         offset, type, imm);
435    }
436
437    (*uop)->setLastMicroop();
438}
439
440BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
441                             OpClass __opClass, IntRegIndex dest,
442                             int64_t imm) :
443    PredMacroOp(mnem, machInst, __opClass)
444{
445    numMicroops = 1;
446    microOps = new StaticInstPtr[numMicroops];
447
448    microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
449    microOps[0]->setLastMicroop();
450}
451
452VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
453                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
454                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
455    PredMacroOp(mnem, machInst, __opClass)
456{
457    assert(regs > 0 && regs <= 4);
458    assert(regs % elems == 0);
459
460    numMicroops = (regs > 2) ? 2 : 1;
461    bool wb = (rm != 15);
462    bool deinterleave = (elems > 1);
463
464    if (wb) numMicroops++;
465    if (deinterleave) numMicroops += (regs / elems);
466    microOps = new StaticInstPtr[numMicroops];
467
468    RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
469
470    uint32_t noAlign = TLB::MustBeOne;
471
472    unsigned uopIdx = 0;
473    switch (regs) {
474      case 4:
475        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
476                size, machInst, rMid, rn, 0, align);
477        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
478                size, machInst, rMid + 4, rn, 16, noAlign);
479        break;
480      case 3:
481        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
482                size, machInst, rMid, rn, 0, align);
483        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
484                size, machInst, rMid + 4, rn, 16, noAlign);
485        break;
486      case 2:
487        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
488                size, machInst, rMid, rn, 0, align);
489        break;
490      case 1:
491        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
492                size, machInst, rMid, rn, 0, align);
493        break;
494      default:
495        // Unknown number of registers
496        microOps[uopIdx++] = new Unknown(machInst);
497    }
498    if (wb) {
499        if (rm != 15 && rm != 13) {
500            microOps[uopIdx++] =
501                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
502        } else {
503            microOps[uopIdx++] =
504                new MicroAddiUop(machInst, rn, rn, regs * 8);
505        }
506    }
507    if (deinterleave) {
508        switch (elems) {
509          case 4:
510            assert(regs == 4);
511            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
512                    size, machInst, vd * 2, rMid, inc * 2);
513            break;
514          case 3:
515            assert(regs == 3);
516            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
517                    size, machInst, vd * 2, rMid, inc * 2);
518            break;
519          case 2:
520            assert(regs == 4 || regs == 2);
521            if (regs == 4) {
522                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
523                        size, machInst, vd * 2, rMid, inc * 2);
524                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
525                        size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
526            } else {
527                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
528                        size, machInst, vd * 2, rMid, inc * 2);
529            }
530            break;
531          default:
532            // Bad number of elements to deinterleave
533            microOps[uopIdx++] = new Unknown(machInst);
534        }
535    }
536    assert(uopIdx == numMicroops);
537
538    for (unsigned i = 0; i < numMicroops - 1; i++) {
539        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
540        assert(uopPtr);
541        uopPtr->setDelayedCommit();
542    }
543    microOps[numMicroops - 1]->setLastMicroop();
544}
545
546VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
547                         OpClass __opClass, bool all, unsigned elems,
548                         RegIndex rn, RegIndex vd, unsigned regs,
549                         unsigned inc, uint32_t size, uint32_t align,
550                         RegIndex rm, unsigned lane) :
551    PredMacroOp(mnem, machInst, __opClass)
552{
553    assert(regs > 0 && regs <= 4);
554    assert(regs % elems == 0);
555
556    unsigned eBytes = (1 << size);
557    unsigned loadSize = eBytes * elems;
558    unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
559                        sizeof(FloatRegBits);
560
561    assert(loadRegs > 0 && loadRegs <= 4);
562
563    numMicroops = 1;
564    bool wb = (rm != 15);
565
566    if (wb) numMicroops++;
567    numMicroops += (regs / elems);
568    microOps = new StaticInstPtr[numMicroops];
569
570    RegIndex ufp0 = NumFloatV7ArchRegs;
571
572    unsigned uopIdx = 0;
573    switch (loadSize) {
574      case 1:
575        microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
576                machInst, ufp0, rn, 0, align);
577        break;
578      case 2:
579        if (eBytes == 2) {
580            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
581                    machInst, ufp0, rn, 0, align);
582        } else {
583            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
584                    machInst, ufp0, rn, 0, align);
585        }
586        break;
587      case 3:
588        microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
589                machInst, ufp0, rn, 0, align);
590        break;
591      case 4:
592        switch (eBytes) {
593          case 1:
594            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
595                    machInst, ufp0, rn, 0, align);
596            break;
597          case 2:
598            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
599                    machInst, ufp0, rn, 0, align);
600            break;
601          case 4:
602            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
603                    machInst, ufp0, rn, 0, align);
604            break;
605        }
606        break;
607      case 6:
608        microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
609                machInst, ufp0, rn, 0, align);
610        break;
611      case 8:
612        switch (eBytes) {
613          case 2:
614            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
615                    machInst, ufp0, rn, 0, align);
616            break;
617          case 4:
618            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
619                    machInst, ufp0, rn, 0, align);
620            break;
621        }
622        break;
623      case 12:
624        microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
625                machInst, ufp0, rn, 0, align);
626        break;
627      case 16:
628        microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
629                machInst, ufp0, rn, 0, align);
630        break;
631      default:
632        // Unrecognized load size
633        microOps[uopIdx++] = new Unknown(machInst);
634    }
635    if (wb) {
636        if (rm != 15 && rm != 13) {
637            microOps[uopIdx++] =
638                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
639        } else {
640            microOps[uopIdx++] =
641                new MicroAddiUop(machInst, rn, rn, loadSize);
642        }
643    }
644    switch (elems) {
645      case 4:
646        assert(regs == 4);
647        switch (size) {
648          case 0:
649            if (all) {
650                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
651                        machInst, vd * 2, ufp0, inc * 2);
652            } else {
653                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
654                        machInst, vd * 2, ufp0, inc * 2, lane);
655            }
656            break;
657          case 1:
658            if (all) {
659                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
660                        machInst, vd * 2, ufp0, inc * 2);
661            } else {
662                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
663                        machInst, vd * 2, ufp0, inc * 2, lane);
664            }
665            break;
666          case 2:
667            if (all) {
668                microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
669                        machInst, vd * 2, ufp0, inc * 2);
670            } else {
671                microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
672                        machInst, vd * 2, ufp0, inc * 2, lane);
673            }
674            break;
675          default:
676            // Bad size
677            microOps[uopIdx++] = new Unknown(machInst);
678            break;
679        }
680        break;
681      case 3:
682        assert(regs == 3);
683        switch (size) {
684          case 0:
685            if (all) {
686                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
687                        machInst, vd * 2, ufp0, inc * 2);
688            } else {
689                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
690                        machInst, vd * 2, ufp0, inc * 2, lane);
691            }
692            break;
693          case 1:
694            if (all) {
695                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
696                        machInst, vd * 2, ufp0, inc * 2);
697            } else {
698                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
699                        machInst, vd * 2, ufp0, inc * 2, lane);
700            }
701            break;
702          case 2:
703            if (all) {
704                microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
705                        machInst, vd * 2, ufp0, inc * 2);
706            } else {
707                microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
708                        machInst, vd * 2, ufp0, inc * 2, lane);
709            }
710            break;
711          default:
712            // Bad size
713            microOps[uopIdx++] = new Unknown(machInst);
714            break;
715        }
716        break;
717      case 2:
718        assert(regs == 2);
719        assert(loadRegs <= 2);
720        switch (size) {
721          case 0:
722            if (all) {
723                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
724                        machInst, vd * 2, ufp0, inc * 2);
725            } else {
726                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
727                        machInst, vd * 2, ufp0, inc * 2, lane);
728            }
729            break;
730          case 1:
731            if (all) {
732                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
733                        machInst, vd * 2, ufp0, inc * 2);
734            } else {
735                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
736                        machInst, vd * 2, ufp0, inc * 2, lane);
737            }
738            break;
739          case 2:
740            if (all) {
741                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
742                        machInst, vd * 2, ufp0, inc * 2);
743            } else {
744                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
745                        machInst, vd * 2, ufp0, inc * 2, lane);
746            }
747            break;
748          default:
749            // Bad size
750            microOps[uopIdx++] = new Unknown(machInst);
751            break;
752        }
753        break;
754      case 1:
755        assert(regs == 1 || (all && regs == 2));
756        assert(loadRegs <= 2);
757        for (unsigned offset = 0; offset < regs; offset++) {
758            switch (size) {
759              case 0:
760                if (all) {
761                    microOps[uopIdx++] =
762                        new MicroUnpackAllNeon2to2Uop<uint8_t>(
763                            machInst, (vd + offset) * 2, ufp0, inc * 2);
764                } else {
765                    microOps[uopIdx++] =
766                        new MicroUnpackNeon2to2Uop<uint8_t>(
767                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
768                }
769                break;
770              case 1:
771                if (all) {
772                    microOps[uopIdx++] =
773                        new MicroUnpackAllNeon2to2Uop<uint16_t>(
774                            machInst, (vd + offset) * 2, ufp0, inc * 2);
775                } else {
776                    microOps[uopIdx++] =
777                        new MicroUnpackNeon2to2Uop<uint16_t>(
778                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
779                }
780                break;
781              case 2:
782                if (all) {
783                    microOps[uopIdx++] =
784                        new MicroUnpackAllNeon2to2Uop<uint32_t>(
785                            machInst, (vd + offset) * 2, ufp0, inc * 2);
786                } else {
787                    microOps[uopIdx++] =
788                        new MicroUnpackNeon2to2Uop<uint32_t>(
789                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
790                }
791                break;
792              default:
793                // Bad size
794                microOps[uopIdx++] = new Unknown(machInst);
795                break;
796            }
797        }
798        break;
799      default:
800        // Bad number of elements to unpack
801        microOps[uopIdx++] = new Unknown(machInst);
802    }
803    assert(uopIdx == numMicroops);
804
805    for (unsigned i = 0; i < numMicroops - 1; i++) {
806        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
807        assert(uopPtr);
808        uopPtr->setDelayedCommit();
809    }
810    microOps[numMicroops - 1]->setLastMicroop();
811}
812
813VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
814                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
815                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
816    PredMacroOp(mnem, machInst, __opClass)
817{
818    assert(regs > 0 && regs <= 4);
819    assert(regs % elems == 0);
820
821    numMicroops = (regs > 2) ? 2 : 1;
822    bool wb = (rm != 15);
823    bool interleave = (elems > 1);
824
825    if (wb) numMicroops++;
826    if (interleave) numMicroops += (regs / elems);
827    microOps = new StaticInstPtr[numMicroops];
828
829    uint32_t noAlign = TLB::MustBeOne;
830
831    RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
832
833    unsigned uopIdx = 0;
834    if (interleave) {
835        switch (elems) {
836          case 4:
837            assert(regs == 4);
838            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
839                    size, machInst, rMid, vd * 2, inc * 2);
840            break;
841          case 3:
842            assert(regs == 3);
843            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
844                    size, machInst, rMid, vd * 2, inc * 2);
845            break;
846          case 2:
847            assert(regs == 4 || regs == 2);
848            if (regs == 4) {
849                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
850                        size, machInst, rMid, vd * 2, inc * 2);
851                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
852                        size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
853            } else {
854                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
855                        size, machInst, rMid, vd * 2, inc * 2);
856            }
857            break;
858          default:
859            // Bad number of elements to interleave
860            microOps[uopIdx++] = new Unknown(machInst);
861        }
862    }
863    switch (regs) {
864      case 4:
865        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
866                size, machInst, rMid, rn, 0, align);
867        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
868                size, machInst, rMid + 4, rn, 16, noAlign);
869        break;
870      case 3:
871        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
872                size, machInst, rMid, rn, 0, align);
873        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
874                size, machInst, rMid + 4, rn, 16, noAlign);
875        break;
876      case 2:
877        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
878                size, machInst, rMid, rn, 0, align);
879        break;
880      case 1:
881        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
882                size, machInst, rMid, rn, 0, align);
883        break;
884      default:
885        // Unknown number of registers
886        microOps[uopIdx++] = new Unknown(machInst);
887    }
888    if (wb) {
889        if (rm != 15 && rm != 13) {
890            microOps[uopIdx++] =
891                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
892        } else {
893            microOps[uopIdx++] =
894                new MicroAddiUop(machInst, rn, rn, regs * 8);
895        }
896    }
897    assert(uopIdx == numMicroops);
898
899    for (unsigned i = 0; i < numMicroops - 1; i++) {
900        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
901        assert(uopPtr);
902        uopPtr->setDelayedCommit();
903    }
904    microOps[numMicroops - 1]->setLastMicroop();
905}
906
907VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
908                         OpClass __opClass, bool all, unsigned elems,
909                         RegIndex rn, RegIndex vd, unsigned regs,
910                         unsigned inc, uint32_t size, uint32_t align,
911                         RegIndex rm, unsigned lane) :
912    PredMacroOp(mnem, machInst, __opClass)
913{
914    assert(!all);
915    assert(regs > 0 && regs <= 4);
916    assert(regs % elems == 0);
917
918    unsigned eBytes = (1 << size);
919    unsigned storeSize = eBytes * elems;
920    unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
921                         sizeof(FloatRegBits);
922
923    assert(storeRegs > 0 && storeRegs <= 4);
924
925    numMicroops = 1;
926    bool wb = (rm != 15);
927
928    if (wb) numMicroops++;
929    numMicroops += (regs / elems);
930    microOps = new StaticInstPtr[numMicroops];
931
932    RegIndex ufp0 = NumFloatV7ArchRegs;
933
934    unsigned uopIdx = 0;
935    switch (elems) {
936      case 4:
937        assert(regs == 4);
938        switch (size) {
939          case 0:
940            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
941                    machInst, ufp0, vd * 2, inc * 2, lane);
942            break;
943          case 1:
944            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
945                    machInst, ufp0, vd * 2, inc * 2, lane);
946            break;
947          case 2:
948            microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
949                    machInst, ufp0, vd * 2, inc * 2, lane);
950            break;
951          default:
952            // Bad size
953            microOps[uopIdx++] = new Unknown(machInst);
954            break;
955        }
956        break;
957      case 3:
958        assert(regs == 3);
959        switch (size) {
960          case 0:
961            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
962                    machInst, ufp0, vd * 2, inc * 2, lane);
963            break;
964          case 1:
965            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
966                    machInst, ufp0, vd * 2, inc * 2, lane);
967            break;
968          case 2:
969            microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
970                    machInst, ufp0, vd * 2, inc * 2, lane);
971            break;
972          default:
973            // Bad size
974            microOps[uopIdx++] = new Unknown(machInst);
975            break;
976        }
977        break;
978      case 2:
979        assert(regs == 2);
980        assert(storeRegs <= 2);
981        switch (size) {
982          case 0:
983            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
984                    machInst, ufp0, vd * 2, inc * 2, lane);
985            break;
986          case 1:
987            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
988                    machInst, ufp0, vd * 2, inc * 2, lane);
989            break;
990          case 2:
991            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
992                    machInst, ufp0, vd * 2, inc * 2, lane);
993            break;
994          default:
995            // Bad size
996            microOps[uopIdx++] = new Unknown(machInst);
997            break;
998        }
999        break;
1000      case 1:
1001        assert(regs == 1 || (all && regs == 2));
1002        assert(storeRegs <= 2);
1003        for (unsigned offset = 0; offset < regs; offset++) {
1004            switch (size) {
1005              case 0:
1006                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1007                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1008                break;
1009              case 1:
1010                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1011                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1012                break;
1013              case 2:
1014                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1015                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1016                break;
1017              default:
1018                // Bad size
1019                microOps[uopIdx++] = new Unknown(machInst);
1020                break;
1021            }
1022        }
1023        break;
1024      default:
1025        // Bad number of elements to unpack
1026        microOps[uopIdx++] = new Unknown(machInst);
1027    }
1028    switch (storeSize) {
1029      case 1:
1030        microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1031                machInst, ufp0, rn, 0, align);
1032        break;
1033      case 2:
1034        if (eBytes == 2) {
1035            microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1036                    machInst, ufp0, rn, 0, align);
1037        } else {
1038            microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1039                    machInst, ufp0, rn, 0, align);
1040        }
1041        break;
1042      case 3:
1043        microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1044                machInst, ufp0, rn, 0, align);
1045        break;
1046      case 4:
1047        switch (eBytes) {
1048          case 1:
1049            microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1050                    machInst, ufp0, rn, 0, align);
1051            break;
1052          case 2:
1053            microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1054                    machInst, ufp0, rn, 0, align);
1055            break;
1056          case 4:
1057            microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1058                    machInst, ufp0, rn, 0, align);
1059            break;
1060        }
1061        break;
1062      case 6:
1063        microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1064                machInst, ufp0, rn, 0, align);
1065        break;
1066      case 8:
1067        switch (eBytes) {
1068          case 2:
1069            microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1070                    machInst, ufp0, rn, 0, align);
1071            break;
1072          case 4:
1073            microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1074                    machInst, ufp0, rn, 0, align);
1075            break;
1076        }
1077        break;
1078      case 12:
1079        microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1080                machInst, ufp0, rn, 0, align);
1081        break;
1082      case 16:
1083        microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1084                machInst, ufp0, rn, 0, align);
1085        break;
1086      default:
1087        // Bad store size
1088        microOps[uopIdx++] = new Unknown(machInst);
1089    }
1090    if (wb) {
1091        if (rm != 15 && rm != 13) {
1092            microOps[uopIdx++] =
1093                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1094        } else {
1095            microOps[uopIdx++] =
1096                new MicroAddiUop(machInst, rn, rn, storeSize);
1097        }
1098    }
1099    assert(uopIdx == numMicroops);
1100
1101    for (unsigned i = 0; i < numMicroops - 1; i++) {
1102        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1103        assert(uopPtr);
1104        uopPtr->setDelayedCommit();
1105    }
1106    microOps[numMicroops - 1]->setLastMicroop();
1107}
1108
1109VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1110                         OpClass __opClass, RegIndex rn, RegIndex vd,
1111                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
1112                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
1113    PredMacroOp(mnem, machInst, __opClass)
1114{
1115    RegIndex vx = NumFloatV8ArchRegs / 4;
1116    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1117    bool baseIsSP = isSP((IntRegIndex) rnsp);
1118
1119    numMicroops = wb ? 1 : 0;
1120
1121    int totNumBytes = numRegs * dataSize / 8;
1122    assert(totNumBytes <= 64);
1123
1124    // The guiding principle here is that no more than 16 bytes can be
1125    // transferred at a time
1126    int numMemMicroops = totNumBytes / 16;
1127    int residuum = totNumBytes % 16;
1128    if (residuum)
1129        ++numMemMicroops;
1130    numMicroops += numMemMicroops;
1131
1132    int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1133    numMicroops += numMarshalMicroops;
1134
1135    microOps = new StaticInstPtr[numMicroops];
1136    unsigned uopIdx = 0;
1137    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1138        TLB::AllowUnaligned;
1139
1140    int i = 0;
1141    for(; i < numMemMicroops - 1; ++i) {
1142        microOps[uopIdx++] = new MicroNeonLoad64(
1143            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1144            baseIsSP, 16 /* accSize */, eSize);
1145    }
1146    microOps[uopIdx++] =  new MicroNeonLoad64(
1147        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1148        residuum ? residuum : 16 /* accSize */, eSize);
1149
1150    // Writeback microop: the post-increment amount is encoded in "Rm": a
1151    // 64-bit general register OR as '11111' for an immediate value equal to
1152    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1153    if (wb) {
1154        if (rm != ((RegIndex) INTREG_X31)) {
1155            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1156                                                      UXTX, 0);
1157        } else {
1158            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1159                                                   totNumBytes);
1160        }
1161    }
1162
1163    for (int i = 0; i < numMarshalMicroops; ++i) {
1164        switch(numRegs) {
1165            case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1166                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1167                        numStructElems, 1, i /* step */);
1168                    break;
1169            case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1170                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1171                        numStructElems, 2, i /* step */);
1172                    break;
1173            case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1174                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1175                        numStructElems, 3, i /* step */);
1176                    break;
1177            case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1178                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1179                        numStructElems, 4, i /* step */);
1180                    break;
1181            default: panic("Invalid number of registers");
1182        }
1183
1184    }
1185
1186    assert(uopIdx == numMicroops);
1187
1188    for (int i = 0; i < numMicroops - 1; ++i) {
1189        microOps[i]->setDelayedCommit();
1190    }
1191    microOps[numMicroops - 1]->setLastMicroop();
1192}
1193
1194VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1195                         OpClass __opClass, RegIndex rn, RegIndex vd,
1196                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
1197                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
1198    PredMacroOp(mnem, machInst, __opClass)
1199{
1200    RegIndex vx = NumFloatV8ArchRegs / 4;
1201    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1202    bool baseIsSP = isSP((IntRegIndex) rnsp);
1203
1204    numMicroops = wb ? 1 : 0;
1205
1206    int totNumBytes = numRegs * dataSize / 8;
1207    assert(totNumBytes <= 64);
1208
1209    // The guiding principle here is that no more than 16 bytes can be
1210    // transferred at a time
1211    int numMemMicroops = totNumBytes / 16;
1212    int residuum = totNumBytes % 16;
1213    if (residuum)
1214        ++numMemMicroops;
1215    numMicroops += numMemMicroops;
1216
1217    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1218    numMicroops += numMarshalMicroops;
1219
1220    microOps = new StaticInstPtr[numMicroops];
1221    unsigned uopIdx = 0;
1222
1223    for(int i = 0; i < numMarshalMicroops; ++i) {
1224        switch (numRegs) {
1225            case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1226                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1227                        numStructElems, 1, i /* step */);
1228                    break;
1229            case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1230                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1231                        numStructElems, 2, i /* step */);
1232                    break;
1233            case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1234                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1235                        numStructElems, 3, i /* step */);
1236                    break;
1237            case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1238                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1239                        numStructElems, 4, i /* step */);
1240                    break;
1241            default: panic("Invalid number of registers");
1242        }
1243    }
1244
1245    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1246        TLB::AllowUnaligned;
1247
1248    int i = 0;
1249    for(; i < numMemMicroops - 1; ++i) {
1250        microOps[uopIdx++] = new MicroNeonStore64(
1251            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1252            baseIsSP, 16 /* accSize */, eSize);
1253    }
1254    microOps[uopIdx++] = new MicroNeonStore64(
1255        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1256        residuum ? residuum : 16 /* accSize */, eSize);
1257
1258    // Writeback microop: the post-increment amount is encoded in "Rm": a
1259    // 64-bit general register OR as '11111' for an immediate value equal to
1260    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1261    if (wb) {
1262        if (rm != ((RegIndex) INTREG_X31)) {
1263            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1264                                                      UXTX, 0);
1265        } else {
1266            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1267                                                   totNumBytes);
1268        }
1269    }
1270
1271    assert(uopIdx == numMicroops);
1272
1273    for (int i = 0; i < numMicroops - 1; i++) {
1274        microOps[i]->setDelayedCommit();
1275    }
1276    microOps[numMicroops - 1]->setLastMicroop();
1277}
1278
1279VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1280                             OpClass __opClass, RegIndex rn, RegIndex vd,
1281                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
1282                             uint8_t numStructElems, uint8_t index, bool wb,
1283                             bool replicate) :
1284    PredMacroOp(mnem, machInst, __opClass)
1285{
1286    RegIndex vx = NumFloatV8ArchRegs / 4;
1287    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1288    bool baseIsSP = isSP((IntRegIndex) rnsp);
1289
1290    numMicroops = wb ? 1 : 0;
1291
1292    int eSizeBytes = 1 << eSize;
1293    int totNumBytes = numStructElems * eSizeBytes;
1294    assert(totNumBytes <= 64);
1295
1296    // The guiding principle here is that no more than 16 bytes can be
1297    // transferred at a time
1298    int numMemMicroops = totNumBytes / 16;
1299    int residuum = totNumBytes % 16;
1300    if (residuum)
1301        ++numMemMicroops;
1302    numMicroops += numMemMicroops;
1303
1304    int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1305    numMicroops += numMarshalMicroops;
1306
1307    microOps = new StaticInstPtr[numMicroops];
1308    unsigned uopIdx = 0;
1309
1310    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1311        TLB::AllowUnaligned;
1312
1313    int i = 0;
1314    for (; i < numMemMicroops - 1; ++i) {
1315        microOps[uopIdx++] = new MicroNeonLoad64(
1316            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1317            baseIsSP, 16 /* accSize */, eSize);
1318    }
1319    microOps[uopIdx++] = new MicroNeonLoad64(
1320        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1321        residuum ? residuum : 16 /* accSize */, eSize);
1322
1323    // Writeback microop: the post-increment amount is encoded in "Rm": a
1324    // 64-bit general register OR as '11111' for an immediate value equal to
1325    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1326    if (wb) {
1327        if (rm != ((RegIndex) INTREG_X31)) {
1328            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1329                                                      UXTX, 0);
1330        } else {
1331            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1332                                                   totNumBytes);
1333        }
1334    }
1335
1336    for(int i = 0; i < numMarshalMicroops; ++i) {
1337        microOps[uopIdx++] = new MicroUnpackNeon64(
1338            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1339            numStructElems, index, i /* step */, replicate);
1340    }
1341
1342    assert(uopIdx == numMicroops);
1343
1344    for (int i = 0; i < numMicroops - 1; i++) {
1345        microOps[i]->setDelayedCommit();
1346    }
1347    microOps[numMicroops - 1]->setLastMicroop();
1348}
1349
1350VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1351                             OpClass __opClass, RegIndex rn, RegIndex vd,
1352                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
1353                             uint8_t numStructElems, uint8_t index, bool wb,
1354                             bool replicate) :
1355    PredMacroOp(mnem, machInst, __opClass)
1356{
1357    RegIndex vx = NumFloatV8ArchRegs / 4;
1358    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1359    bool baseIsSP = isSP((IntRegIndex) rnsp);
1360
1361    numMicroops = wb ? 1 : 0;
1362
1363    int eSizeBytes = 1 << eSize;
1364    int totNumBytes = numStructElems * eSizeBytes;
1365    assert(totNumBytes <= 64);
1366
1367    // The guiding principle here is that no more than 16 bytes can be
1368    // transferred at a time
1369    int numMemMicroops = totNumBytes / 16;
1370    int residuum = totNumBytes % 16;
1371    if (residuum)
1372        ++numMemMicroops;
1373    numMicroops += numMemMicroops;
1374
1375    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1376    numMicroops += numMarshalMicroops;
1377
1378    microOps = new StaticInstPtr[numMicroops];
1379    unsigned uopIdx = 0;
1380
1381    for(int i = 0; i < numMarshalMicroops; ++i) {
1382        microOps[uopIdx++] = new MicroPackNeon64(
1383            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1384            numStructElems, index, i /* step */, replicate);
1385    }
1386
1387    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1388        TLB::AllowUnaligned;
1389
1390    int i = 0;
1391    for(; i < numMemMicroops - 1; ++i) {
1392        microOps[uopIdx++] = new MicroNeonStore64(
1393            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1394            baseIsSP, 16 /* accsize */, eSize);
1395    }
1396    microOps[uopIdx++] = new MicroNeonStore64(
1397        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1398        residuum ? residuum : 16 /* accSize */, eSize);
1399
1400    // Writeback microop: the post-increment amount is encoded in "Rm": a
1401    // 64-bit general register OR as '11111' for an immediate value equal to
1402    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1403    if (wb) {
1404        if (rm != ((RegIndex) INTREG_X31)) {
1405            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1406                                                      UXTX, 0);
1407        } else {
1408            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1409                                                   totNumBytes);
1410        }
1411    }
1412
1413    assert(uopIdx == numMicroops);
1414
1415    for (int i = 0; i < numMicroops - 1; i++) {
1416        microOps[i]->setDelayedCommit();
1417    }
1418    microOps[numMicroops - 1]->setLastMicroop();
1419}
1420
1421MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1422                             OpClass __opClass, IntRegIndex rn,
1423                             RegIndex vd, bool single, bool up,
1424                             bool writeback, bool load, uint32_t offset) :
1425    PredMacroOp(mnem, machInst, __opClass)
1426{
1427    int i = 0;
1428
1429    // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1430    // to be functionally identical except that fldmx is deprecated. For now
1431    // we'll assume they're otherwise interchangable.
1432    int count = (single ? offset : (offset / 2));
1433    if (count == 0 || count > NumFloatV7ArchRegs)
1434        warn_once("Bad offset field for VFP load/store multiple.\n");
1435    if (count == 0) {
1436        // Force there to be at least one microop so the macroop makes sense.
1437        writeback = true;
1438    }
1439    if (count > NumFloatV7ArchRegs)
1440        count = NumFloatV7ArchRegs;
1441
1442    numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1443    microOps = new StaticInstPtr[numMicroops];
1444
1445    int64_t addr = 0;
1446
1447    if (!up)
1448        addr = 4 * offset;
1449
1450    bool tempUp = up;
1451    for (int j = 0; j < count; j++) {
1452        if (load) {
1453            if (single) {
1454                microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1455                                                  tempUp, addr);
1456            } else {
1457                microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1458                                                    tempUp, addr);
1459                microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1460                                                    addr + (up ? 4 : -4));
1461            }
1462        } else {
1463            if (single) {
1464                microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1465                                                  tempUp, addr);
1466            } else {
1467                microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1468                                                    tempUp, addr);
1469                microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1470                                                    addr + (up ? 4 : -4));
1471            }
1472        }
1473        if (!tempUp) {
1474            addr -= (single ? 4 : 8);
1475            // The microops don't handle negative displacement, so turn if we
1476            // hit zero, flip polarity and start adding.
1477            if (addr <= 0) {
1478                tempUp = true;
1479                addr = -addr;
1480            }
1481        } else {
1482            addr += (single ? 4 : 8);
1483        }
1484    }
1485
1486    if (writeback) {
1487        if (up) {
1488            microOps[i++] =
1489                new MicroAddiUop(machInst, rn, rn, 4 * offset);
1490        } else {
1491            microOps[i++] =
1492                new MicroSubiUop(machInst, rn, rn, 4 * offset);
1493        }
1494    }
1495
1496    assert(numMicroops == i);
1497    microOps[numMicroops - 1]->setLastMicroop();
1498
1499    for (StaticInstPtr *curUop = microOps;
1500            !(*curUop)->isLastMicroop(); curUop++) {
1501        MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1502        assert(uopPtr);
1503        uopPtr->setDelayedCommit();
1504    }
1505}
1506
1507std::string
1508MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1509{
1510    std::stringstream ss;
1511    printMnemonic(ss);
1512    printReg(ss, ura);
1513    ss << ", ";
1514    printReg(ss, urb);
1515    ss << ", ";
1516    ccprintf(ss, "#%d", imm);
1517    return ss.str();
1518}
1519
1520std::string
1521MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1522{
1523    std::stringstream ss;
1524    printMnemonic(ss);
1525    printReg(ss, ura);
1526    ss << ", ";
1527    printReg(ss, urb);
1528    ss << ", ";
1529    ccprintf(ss, "#%d", imm);
1530    return ss.str();
1531}
1532
1533std::string
1534MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1535{
1536    std::stringstream ss;
1537    printMnemonic(ss);
1538    ss << "[PC,CPSR]";
1539    return ss.str();
1540}
1541
1542std::string
1543MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1544{
1545    std::stringstream ss;
1546    printMnemonic(ss);
1547    printReg(ss, ura);
1548    ccprintf(ss, ", ");
1549    printReg(ss, urb);
1550    printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1551    return ss.str();
1552}
1553
1554std::string
1555MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1556{
1557    std::stringstream ss;
1558    printMnemonic(ss);
1559    printReg(ss, ura);
1560    ss << ", ";
1561    printReg(ss, urb);
1562    return ss.str();
1563}
1564
1565std::string
1566MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1567{
1568    std::stringstream ss;
1569    printMnemonic(ss);
1570    printReg(ss, ura);
1571    ss << ", ";
1572    printReg(ss, urb);
1573    ss << ", ";
1574    printReg(ss, urc);
1575    return ss.str();
1576}
1577
1578std::string
1579MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1580{
1581    std::stringstream ss;
1582    printMnemonic(ss);
1583    if (isFloating())
1584        printReg(ss, ura + FP_Reg_Base);
1585    else
1586        printReg(ss, ura);
1587    ss << ", [";
1588    printReg(ss, urb);
1589    ss << ", ";
1590    ccprintf(ss, "#%d", imm);
1591    ss << "]";
1592    return ss.str();
1593}
1594
1595std::string
1596MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1597{
1598    std::stringstream ss;
1599    printMnemonic(ss);
1600    printReg(ss, dest);
1601    ss << ",";
1602    printReg(ss, dest2);
1603    ss << ", [";
1604    printReg(ss, urb);
1605    ss << ", ";
1606    ccprintf(ss, "#%d", imm);
1607    ss << "]";
1608    return ss.str();
1609}
1610
1611}
1612