1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include "arch/arm/insts/macromem.hh"
44
45#include <sstream>
46
47#include "arch/arm/generated/decoder.hh"
48#include "arch/arm/insts/neon64_mem.hh"
49
50using namespace std;
51using namespace ArmISAInst;
52
53namespace ArmISA
54{
55
56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57                       OpClass __opClass, IntRegIndex rn,
58                       bool index, bool up, bool user, bool writeback,
59                       bool load, uint32_t reglist) :
60    PredMacroOp(mnem, machInst, __opClass)
61{
62    uint32_t regs = reglist;
63    uint32_t ones = number_of_ones(reglist);
64    uint32_t mem_ops = ones;
65
66    // Copy the base address register if we overwrite it, or if this instruction
67    // is basically a no-op (we have to do something)
68    bool copy_base =  (bits(reglist, rn) && load) || !ones;
69    bool force_user = user & !bits(reglist, 15);
70    bool exception_ret = user & bits(reglist, 15);
71    bool pc_temp = load && writeback && bits(reglist, 15);
72
73    if (!ones) {
74        numMicroops = 1;
75    } else if (load) {
76        numMicroops = ((ones + 1) / 2)
77                    + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
78                    + (copy_base ? 1 : 0)
79                    + (writeback? 1 : 0)
80                    + (pc_temp ? 1 : 0);
81    } else {
82        numMicroops = ones + (writeback ? 1 : 0);
83    }
84
85    microOps = new StaticInstPtr[numMicroops];
86
87    uint32_t addr = 0;
88
89    if (!up)
90        addr = (ones << 2) - 4;
91
92    if (!index)
93        addr += 4;
94
95    StaticInstPtr *uop = microOps;
96
97    // Add 0 to Rn and stick it in ureg0.
98    // This is equivalent to a move.
99    if (copy_base)
100        *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
101
102    unsigned reg = 0;
103    while (mem_ops != 0) {
104        // Do load operations in pairs if possible
105        if (load && mem_ops >= 2 &&
106            !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
107            // 64-bit memory operation
108            // Find 2 set register bits (clear them after finding)
109            unsigned reg_idx1;
110            unsigned reg_idx2;
111
112            // Find the first register
113            while (!bits(regs, reg)) reg++;
114            replaceBits(regs, reg, 0);
115            reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
116
117            // Find the second register
118            while (!bits(regs, reg)) reg++;
119            replaceBits(regs, reg, 0);
120            reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
121
122            // Load into temp reg if necessary
123            if (reg_idx2 == INTREG_PC && pc_temp)
124                reg_idx2 = INTREG_UREG1;
125
126            // Actually load both registers from memory
127            *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
128                    copy_base ? INTREG_UREG0 : rn, up, addr);
129
130            if (!writeback && reg_idx2 == INTREG_PC) {
131                // No writeback if idx==pc, set appropriate flags
132                (*uop)->setFlag(StaticInst::IsControl);
133                (*uop)->setFlag(StaticInst::IsIndirectControl);
134
135                if (!(condCode == COND_AL || condCode == COND_UC))
136                    (*uop)->setFlag(StaticInst::IsCondControl);
137                else
138                    (*uop)->setFlag(StaticInst::IsUncondControl);
139            }
140
141            if (up) addr += 8;
142            else addr -= 8;
143            mem_ops -= 2;
144        } else {
145            // 32-bit memory operation
146            // Find register for operation
147            unsigned reg_idx;
148            while (!bits(regs, reg)) reg++;
149            replaceBits(regs, reg, 0);
150            reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
151
152            if (load) {
153                if (writeback && reg_idx == INTREG_PC) {
154                    // If this instruction changes the PC and performs a
155                    // writeback, ensure the pc load/branch is the last uop.
156                    // Load into a temp reg here.
157                    *uop = new MicroLdrUop(machInst, INTREG_UREG1,
158                            copy_base ? INTREG_UREG0 : rn, up, addr);
159                } else if (reg_idx == INTREG_PC && exception_ret) {
160                    // Special handling for exception return
161                    *uop = new MicroLdrRetUop(machInst, reg_idx,
162                            copy_base ? INTREG_UREG0 : rn, up, addr);
163                } else {
164                    // standard single load uop
165                    *uop = new MicroLdrUop(machInst, reg_idx,
166                            copy_base ? INTREG_UREG0 : rn, up, addr);
167                }
168
169                // Loading pc as last operation?  Set appropriate flags.
170                if (!writeback && reg_idx == INTREG_PC) {
171                    (*uop)->setFlag(StaticInst::IsControl);
172                    (*uop)->setFlag(StaticInst::IsIndirectControl);
173
174                    if (!(condCode == COND_AL || condCode == COND_UC))
175                        (*uop)->setFlag(StaticInst::IsCondControl);
176                    else
177                        (*uop)->setFlag(StaticInst::IsUncondControl);
178                }
179            } else {
180                *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
181            }
182
183            if (up) addr += 4;
184            else addr -= 4;
185            --mem_ops;
186        }
187
188        // Load/store micro-op generated, go to next uop
189        ++uop;
190    }
191
192    if (writeback && ones) {
193        // Perform writeback uop operation
194        if (up)
195            *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
196        else
197            *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
198
199        // Write PC after address writeback?
200        if (pc_temp) {
201            if (exception_ret) {
202                *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
203            } else {
204                *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
205            }
206            (*uop)->setFlag(StaticInst::IsControl);
207            (*uop)->setFlag(StaticInst::IsIndirectControl);
208
209            if (!(condCode == COND_AL || condCode == COND_UC))
210                (*uop)->setFlag(StaticInst::IsCondControl);
211            else
212                (*uop)->setFlag(StaticInst::IsUncondControl);
213
214            if (rn == INTREG_SP)
215                (*uop)->setFlag(StaticInst::IsReturn);
216
217            ++uop;
218        }
219    }
220
221    --uop;
222    (*uop)->setLastMicroop();
223    microOps[0]->setFirstMicroop();
224
225    /* Take the control flags from the last microop for the macroop */
226    if ((*uop)->isControl())
227        setFlag(StaticInst::IsControl);
228    if ((*uop)->isCondCtrl())
229        setFlag(StaticInst::IsCondControl);
230    if ((*uop)->isUncondCtrl())
231        setFlag(StaticInst::IsUncondControl);
232    if ((*uop)->isIndirectCtrl())
233        setFlag(StaticInst::IsIndirectControl);
234    if ((*uop)->isReturn())
235        setFlag(StaticInst::IsReturn);
236
237    for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
238        (*uop)->setDelayedCommit();
239    }
240}
241
242PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
243                     uint32_t size, bool fp, bool load, bool noAlloc,
244                     bool signExt, bool exclusive, bool acrel,
245                     int64_t imm, AddrMode mode,
246                     IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
247    PredMacroOp(mnem, machInst, __opClass)
248{
249    bool post = (mode == AddrMd_PostIndex);
250    bool writeback = (mode != AddrMd_Offset);
251
252    if (load) {
253        // Use integer rounding to round up loads of size 4
254        numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
255    } else {
256        numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
257    }
258    microOps = new StaticInstPtr[numMicroops];
259
260    StaticInstPtr *uop = microOps;
261
262    rn = makeSP(rn);
263
264    if (!post) {
265        *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
266                post ? 0 : imm);
267    }
268
269    if (fp) {
270        if (size == 16) {
271            if (load) {
272                *uop++ = new MicroLdFp16Uop(machInst, rt,
273                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
274                *uop++ = new MicroLdFp16Uop(machInst, rt2,
275                        post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
276            } else {
277                *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
278                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
279                *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
280                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
281                *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
282                        post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
283                *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
284                        post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
285            }
286        } else if (size == 8) {
287            if (load) {
288                *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
289                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
290            } else {
291                *uop++ = new MicroStrFpXImmUop(machInst, rt,
292                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
293                *uop++ = new MicroStrFpXImmUop(machInst, rt2,
294                        post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
295            }
296        } else if (size == 4) {
297            if (load) {
298                *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
299                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
300            } else {
301                *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
302                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
303            }
304        }
305    } else {
306        if (size == 8) {
307            if (load) {
308                *uop++ = new MicroLdPairUop(machInst, rt, rt2,
309                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
310            } else {
311                *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
312                        0, noAlloc, exclusive, acrel);
313                *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
314                        size, noAlloc, exclusive, acrel);
315            }
316        } else if (size == 4) {
317            if (load) {
318                if (signExt) {
319                    *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
320                            post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
321                } else {
322                    *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
323                            post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
324                }
325            } else {
326                *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
327                        post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
328            }
329        }
330    }
331
332    if (writeback) {
333        *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
334                                   post ? imm : 0);
335    }
336
337    assert(uop == &microOps[numMicroops]);
338    (*--uop)->setLastMicroop();
339    microOps[0]->setFirstMicroop();
340
341    for (StaticInstPtr *curUop = microOps;
342            !(*curUop)->isLastMicroop(); curUop++) {
343        (*curUop)->setDelayedCommit();
344    }
345}
346
347BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
348                             OpClass __opClass, bool load, IntRegIndex dest,
349                             IntRegIndex base, int64_t imm) :
350    PredMacroOp(mnem, machInst, __opClass)
351{
352    numMicroops = load ? 1 : 2;
353    microOps = new StaticInstPtr[numMicroops];
354
355    StaticInstPtr *uop = microOps;
356
357    if (load) {
358        *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
359    } else {
360        *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
361        (*uop)->setDelayedCommit();
362        *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
363    }
364    (*uop)->setLastMicroop();
365    microOps[0]->setFirstMicroop();
366}
367
368BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
369                               OpClass __opClass, bool load, IntRegIndex dest,
370                               IntRegIndex base, int64_t imm) :
371    PredMacroOp(mnem, machInst, __opClass)
372{
373    numMicroops = load ? 2 : 3;
374    microOps = new StaticInstPtr[numMicroops];
375
376    StaticInstPtr *uop = microOps;
377
378    if (load) {
379        *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
380    } else {
381        *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
382        *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
383    }
384    *uop = new MicroAddXiUop(machInst, base, base, imm);
385    (*uop)->setLastMicroop();
386    microOps[0]->setFirstMicroop();
387
388    for (StaticInstPtr *curUop = microOps;
389            !(*curUop)->isLastMicroop(); curUop++) {
390        (*curUop)->setDelayedCommit();
391    }
392}
393
394BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
395                             OpClass __opClass, bool load, IntRegIndex dest,
396                             IntRegIndex base, int64_t imm) :
397    PredMacroOp(mnem, machInst, __opClass)
398{
399    numMicroops = load ? 2 : 3;
400    microOps = new StaticInstPtr[numMicroops];
401
402    StaticInstPtr *uop = microOps;
403
404    if (load) {
405        *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
406    } else {
407        *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
408        *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
409    }
410    *uop = new MicroAddXiUop(machInst, base, base, imm);
411    (*uop)->setLastMicroop();
412    microOps[0]->setFirstMicroop();
413
414    for (StaticInstPtr *curUop = microOps;
415            !(*curUop)->isLastMicroop(); curUop++) {
416        (*curUop)->setDelayedCommit();
417    }
418}
419
420BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
421                             OpClass __opClass, bool load, IntRegIndex dest,
422                             IntRegIndex base, IntRegIndex offset,
423                             ArmExtendType type, int64_t imm) :
424    PredMacroOp(mnem, machInst, __opClass)
425{
426    numMicroops = load ? 1 : 2;
427    microOps = new StaticInstPtr[numMicroops];
428
429    StaticInstPtr *uop = microOps;
430
431    if (load) {
432        *uop = new MicroLdFp16RegUop(machInst, dest, base,
433                                  offset, type, imm);
434    } else {
435        *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
436                                       offset, type, imm);
437        (*uop)->setDelayedCommit();
438        *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
439                                         offset, type, imm);
440    }
441
442    (*uop)->setLastMicroop();
443    microOps[0]->setFirstMicroop();
444}
445
446BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
447                             OpClass __opClass, IntRegIndex dest,
448                             int64_t imm) :
449    PredMacroOp(mnem, machInst, __opClass)
450{
451    numMicroops = 1;
452    microOps = new StaticInstPtr[numMicroops];
453
454    microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
455    microOps[0]->setLastMicroop();
456    microOps[0]->setFirstMicroop();
457}
458
459VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
460                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
461                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
462    PredMacroOp(mnem, machInst, __opClass)
463{
464    assert(regs > 0 && regs <= 4);
465    assert(regs % elems == 0);
466
467    numMicroops = (regs > 2) ? 2 : 1;
468    bool wb = (rm != 15);
469    bool deinterleave = (elems > 1);
470
471    if (wb) numMicroops++;
472    if (deinterleave) numMicroops += (regs / elems);
473    microOps = new StaticInstPtr[numMicroops];
474
475    RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
476
477    uint32_t noAlign = TLB::MustBeOne;
478
479    unsigned uopIdx = 0;
480    switch (regs) {
481      case 4:
482        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
483                size, machInst, rMid, rn, 0, align);
484        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
485                size, machInst, rMid + 4, rn, 16, noAlign);
486        break;
487      case 3:
488        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
489                size, machInst, rMid, rn, 0, align);
490        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
491                size, machInst, rMid + 4, rn, 16, noAlign);
492        break;
493      case 2:
494        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
495                size, machInst, rMid, rn, 0, align);
496        break;
497      case 1:
498        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
499                size, machInst, rMid, rn, 0, align);
500        break;
501      default:
502        // Unknown number of registers
503        microOps[uopIdx++] = new Unknown(machInst);
504    }
505    if (wb) {
506        if (rm != 15 && rm != 13) {
507            microOps[uopIdx++] =
508                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
509        } else {
510            microOps[uopIdx++] =
511                new MicroAddiUop(machInst, rn, rn, regs * 8);
512        }
513    }
514    if (deinterleave) {
515        switch (elems) {
516          case 4:
517            assert(regs == 4);
518            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
519                    size, machInst, vd * 2, rMid, inc * 2);
520            break;
521          case 3:
522            assert(regs == 3);
523            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
524                    size, machInst, vd * 2, rMid, inc * 2);
525            break;
526          case 2:
527            assert(regs == 4 || regs == 2);
528            if (regs == 4) {
529                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
530                        size, machInst, vd * 2, rMid, inc * 2);
531                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
532                        size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
533            } else {
534                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
535                        size, machInst, vd * 2, rMid, inc * 2);
536            }
537            break;
538          default:
539            // Bad number of elements to deinterleave
540            microOps[uopIdx++] = new Unknown(machInst);
541        }
542    }
543    assert(uopIdx == numMicroops);
544
545    for (unsigned i = 0; i < numMicroops - 1; i++) {
546        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
547        assert(uopPtr);
548        uopPtr->setDelayedCommit();
549    }
550    microOps[0]->setFirstMicroop();
551    microOps[numMicroops - 1]->setLastMicroop();
552}
553
554VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
555                         OpClass __opClass, bool all, unsigned elems,
556                         RegIndex rn, RegIndex vd, unsigned regs,
557                         unsigned inc, uint32_t size, uint32_t align,
558                         RegIndex rm, unsigned lane) :
559    PredMacroOp(mnem, machInst, __opClass)
560{
561    assert(regs > 0 && regs <= 4);
562    assert(regs % elems == 0);
563
564    unsigned eBytes = (1 << size);
565    unsigned loadSize = eBytes * elems;
566    unsigned loadRegs M5_VAR_USED =
567        (loadSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
568
569    assert(loadRegs > 0 && loadRegs <= 4);
570
571    numMicroops = 1;
572    bool wb = (rm != 15);
573
574    if (wb) numMicroops++;
575    numMicroops += (regs / elems);
576    microOps = new StaticInstPtr[numMicroops];
577
578    RegIndex ufp0 = NumFloatV7ArchRegs;
579
580    unsigned uopIdx = 0;
581    switch (loadSize) {
582      case 1:
583        microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
584                machInst, ufp0, rn, 0, align);
585        break;
586      case 2:
587        if (eBytes == 2) {
588            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
589                    machInst, ufp0, rn, 0, align);
590        } else {
591            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
592                    machInst, ufp0, rn, 0, align);
593        }
594        break;
595      case 3:
596        microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
597                machInst, ufp0, rn, 0, align);
598        break;
599      case 4:
600        switch (eBytes) {
601          case 1:
602            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
603                    machInst, ufp0, rn, 0, align);
604            break;
605          case 2:
606            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
607                    machInst, ufp0, rn, 0, align);
608            break;
609          case 4:
610            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
611                    machInst, ufp0, rn, 0, align);
612            break;
613        }
614        break;
615      case 6:
616        microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
617                machInst, ufp0, rn, 0, align);
618        break;
619      case 8:
620        switch (eBytes) {
621          case 2:
622            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
623                    machInst, ufp0, rn, 0, align);
624            break;
625          case 4:
626            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
627                    machInst, ufp0, rn, 0, align);
628            break;
629        }
630        break;
631      case 12:
632        microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
633                machInst, ufp0, rn, 0, align);
634        break;
635      case 16:
636        microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
637                machInst, ufp0, rn, 0, align);
638        break;
639      default:
640        // Unrecognized load size
641        microOps[uopIdx++] = new Unknown(machInst);
642    }
643    if (wb) {
644        if (rm != 15 && rm != 13) {
645            microOps[uopIdx++] =
646                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
647        } else {
648            microOps[uopIdx++] =
649                new MicroAddiUop(machInst, rn, rn, loadSize);
650        }
651    }
652    switch (elems) {
653      case 4:
654        assert(regs == 4);
655        switch (size) {
656          case 0:
657            if (all) {
658                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
659                        machInst, vd * 2, ufp0, inc * 2);
660            } else {
661                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
662                        machInst, vd * 2, ufp0, inc * 2, lane);
663            }
664            break;
665          case 1:
666            if (all) {
667                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
668                        machInst, vd * 2, ufp0, inc * 2);
669            } else {
670                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
671                        machInst, vd * 2, ufp0, inc * 2, lane);
672            }
673            break;
674          case 2:
675            if (all) {
676                microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
677                        machInst, vd * 2, ufp0, inc * 2);
678            } else {
679                microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
680                        machInst, vd * 2, ufp0, inc * 2, lane);
681            }
682            break;
683          default:
684            // Bad size
685            microOps[uopIdx++] = new Unknown(machInst);
686            break;
687        }
688        break;
689      case 3:
690        assert(regs == 3);
691        switch (size) {
692          case 0:
693            if (all) {
694                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
695                        machInst, vd * 2, ufp0, inc * 2);
696            } else {
697                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
698                        machInst, vd * 2, ufp0, inc * 2, lane);
699            }
700            break;
701          case 1:
702            if (all) {
703                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
704                        machInst, vd * 2, ufp0, inc * 2);
705            } else {
706                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
707                        machInst, vd * 2, ufp0, inc * 2, lane);
708            }
709            break;
710          case 2:
711            if (all) {
712                microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
713                        machInst, vd * 2, ufp0, inc * 2);
714            } else {
715                microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
716                        machInst, vd * 2, ufp0, inc * 2, lane);
717            }
718            break;
719          default:
720            // Bad size
721            microOps[uopIdx++] = new Unknown(machInst);
722            break;
723        }
724        break;
725      case 2:
726        assert(regs == 2);
727        assert(loadRegs <= 2);
728        switch (size) {
729          case 0:
730            if (all) {
731                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
732                        machInst, vd * 2, ufp0, inc * 2);
733            } else {
734                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
735                        machInst, vd * 2, ufp0, inc * 2, lane);
736            }
737            break;
738          case 1:
739            if (all) {
740                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
741                        machInst, vd * 2, ufp0, inc * 2);
742            } else {
743                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
744                        machInst, vd * 2, ufp0, inc * 2, lane);
745            }
746            break;
747          case 2:
748            if (all) {
749                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
750                        machInst, vd * 2, ufp0, inc * 2);
751            } else {
752                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
753                        machInst, vd * 2, ufp0, inc * 2, lane);
754            }
755            break;
756          default:
757            // Bad size
758            microOps[uopIdx++] = new Unknown(machInst);
759            break;
760        }
761        break;
762      case 1:
763        assert(regs == 1 || (all && regs == 2));
764        assert(loadRegs <= 2);
765        for (unsigned offset = 0; offset < regs; offset++) {
766            switch (size) {
767              case 0:
768                if (all) {
769                    microOps[uopIdx++] =
770                        new MicroUnpackAllNeon2to2Uop<uint8_t>(
771                            machInst, (vd + offset) * 2, ufp0, inc * 2);
772                } else {
773                    microOps[uopIdx++] =
774                        new MicroUnpackNeon2to2Uop<uint8_t>(
775                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
776                }
777                break;
778              case 1:
779                if (all) {
780                    microOps[uopIdx++] =
781                        new MicroUnpackAllNeon2to2Uop<uint16_t>(
782                            machInst, (vd + offset) * 2, ufp0, inc * 2);
783                } else {
784                    microOps[uopIdx++] =
785                        new MicroUnpackNeon2to2Uop<uint16_t>(
786                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
787                }
788                break;
789              case 2:
790                if (all) {
791                    microOps[uopIdx++] =
792                        new MicroUnpackAllNeon2to2Uop<uint32_t>(
793                            machInst, (vd + offset) * 2, ufp0, inc * 2);
794                } else {
795                    microOps[uopIdx++] =
796                        new MicroUnpackNeon2to2Uop<uint32_t>(
797                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
798                }
799                break;
800              default:
801                // Bad size
802                microOps[uopIdx++] = new Unknown(machInst);
803                break;
804            }
805        }
806        break;
807      default:
808        // Bad number of elements to unpack
809        microOps[uopIdx++] = new Unknown(machInst);
810    }
811    assert(uopIdx == numMicroops);
812
813    for (unsigned i = 0; i < numMicroops - 1; i++) {
814        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
815        assert(uopPtr);
816        uopPtr->setDelayedCommit();
817    }
818    microOps[0]->setFirstMicroop();
819    microOps[numMicroops - 1]->setLastMicroop();
820}
821
822VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
823                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
824                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
825    PredMacroOp(mnem, machInst, __opClass)
826{
827    assert(regs > 0 && regs <= 4);
828    assert(regs % elems == 0);
829
830    numMicroops = (regs > 2) ? 2 : 1;
831    bool wb = (rm != 15);
832    bool interleave = (elems > 1);
833
834    if (wb) numMicroops++;
835    if (interleave) numMicroops += (regs / elems);
836    microOps = new StaticInstPtr[numMicroops];
837
838    uint32_t noAlign = TLB::MustBeOne;
839
840    RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
841
842    unsigned uopIdx = 0;
843    if (interleave) {
844        switch (elems) {
845          case 4:
846            assert(regs == 4);
847            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
848                    size, machInst, rMid, vd * 2, inc * 2);
849            break;
850          case 3:
851            assert(regs == 3);
852            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
853                    size, machInst, rMid, vd * 2, inc * 2);
854            break;
855          case 2:
856            assert(regs == 4 || regs == 2);
857            if (regs == 4) {
858                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
859                        size, machInst, rMid, vd * 2, inc * 2);
860                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
861                        size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
862            } else {
863                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
864                        size, machInst, rMid, vd * 2, inc * 2);
865            }
866            break;
867          default:
868            // Bad number of elements to interleave
869            microOps[uopIdx++] = new Unknown(machInst);
870        }
871    }
872    switch (regs) {
873      case 4:
874        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
875                size, machInst, rMid, rn, 0, align);
876        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
877                size, machInst, rMid + 4, rn, 16, noAlign);
878        break;
879      case 3:
880        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
881                size, machInst, rMid, rn, 0, align);
882        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
883                size, machInst, rMid + 4, rn, 16, noAlign);
884        break;
885      case 2:
886        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
887                size, machInst, rMid, rn, 0, align);
888        break;
889      case 1:
890        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
891                size, machInst, rMid, rn, 0, align);
892        break;
893      default:
894        // Unknown number of registers
895        microOps[uopIdx++] = new Unknown(machInst);
896    }
897    if (wb) {
898        if (rm != 15 && rm != 13) {
899            microOps[uopIdx++] =
900                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
901        } else {
902            microOps[uopIdx++] =
903                new MicroAddiUop(machInst, rn, rn, regs * 8);
904        }
905    }
906    assert(uopIdx == numMicroops);
907
908    for (unsigned i = 0; i < numMicroops - 1; i++) {
909        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
910        assert(uopPtr);
911        uopPtr->setDelayedCommit();
912    }
913    microOps[0]->setFirstMicroop();
914    microOps[numMicroops - 1]->setLastMicroop();
915}
916
917VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
918                         OpClass __opClass, bool all, unsigned elems,
919                         RegIndex rn, RegIndex vd, unsigned regs,
920                         unsigned inc, uint32_t size, uint32_t align,
921                         RegIndex rm, unsigned lane) :
922    PredMacroOp(mnem, machInst, __opClass)
923{
924    assert(!all);
925    assert(regs > 0 && regs <= 4);
926    assert(regs % elems == 0);
927
928    unsigned eBytes = (1 << size);
929    unsigned storeSize = eBytes * elems;
930    unsigned storeRegs M5_VAR_USED =
931        (storeSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
932
933    assert(storeRegs > 0 && storeRegs <= 4);
934
935    numMicroops = 1;
936    bool wb = (rm != 15);
937
938    if (wb) numMicroops++;
939    numMicroops += (regs / elems);
940    microOps = new StaticInstPtr[numMicroops];
941
942    RegIndex ufp0 = NumFloatV7ArchRegs;
943
944    unsigned uopIdx = 0;
945    switch (elems) {
946      case 4:
947        assert(regs == 4);
948        switch (size) {
949          case 0:
950            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
951                    machInst, ufp0, vd * 2, inc * 2, lane);
952            break;
953          case 1:
954            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
955                    machInst, ufp0, vd * 2, inc * 2, lane);
956            break;
957          case 2:
958            microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
959                    machInst, ufp0, vd * 2, inc * 2, lane);
960            break;
961          default:
962            // Bad size
963            microOps[uopIdx++] = new Unknown(machInst);
964            break;
965        }
966        break;
967      case 3:
968        assert(regs == 3);
969        switch (size) {
970          case 0:
971            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
972                    machInst, ufp0, vd * 2, inc * 2, lane);
973            break;
974          case 1:
975            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
976                    machInst, ufp0, vd * 2, inc * 2, lane);
977            break;
978          case 2:
979            microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
980                    machInst, ufp0, vd * 2, inc * 2, lane);
981            break;
982          default:
983            // Bad size
984            microOps[uopIdx++] = new Unknown(machInst);
985            break;
986        }
987        break;
988      case 2:
989        assert(regs == 2);
990        assert(storeRegs <= 2);
991        switch (size) {
992          case 0:
993            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
994                    machInst, ufp0, vd * 2, inc * 2, lane);
995            break;
996          case 1:
997            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
998                    machInst, ufp0, vd * 2, inc * 2, lane);
999            break;
1000          case 2:
1001            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1002                    machInst, ufp0, vd * 2, inc * 2, lane);
1003            break;
1004          default:
1005            // Bad size
1006            microOps[uopIdx++] = new Unknown(machInst);
1007            break;
1008        }
1009        break;
1010      case 1:
1011        assert(regs == 1 || (all && regs == 2));
1012        assert(storeRegs <= 2);
1013        for (unsigned offset = 0; offset < regs; offset++) {
1014            switch (size) {
1015              case 0:
1016                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1017                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1018                break;
1019              case 1:
1020                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1021                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1022                break;
1023              case 2:
1024                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1025                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1026                break;
1027              default:
1028                // Bad size
1029                microOps[uopIdx++] = new Unknown(machInst);
1030                break;
1031            }
1032        }
1033        break;
1034      default:
1035        // Bad number of elements to unpack
1036        microOps[uopIdx++] = new Unknown(machInst);
1037    }
1038    switch (storeSize) {
1039      case 1:
1040        microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1041                machInst, ufp0, rn, 0, align);
1042        break;
1043      case 2:
1044        if (eBytes == 2) {
1045            microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1046                    machInst, ufp0, rn, 0, align);
1047        } else {
1048            microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1049                    machInst, ufp0, rn, 0, align);
1050        }
1051        break;
1052      case 3:
1053        microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1054                machInst, ufp0, rn, 0, align);
1055        break;
1056      case 4:
1057        switch (eBytes) {
1058          case 1:
1059            microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1060                    machInst, ufp0, rn, 0, align);
1061            break;
1062          case 2:
1063            microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1064                    machInst, ufp0, rn, 0, align);
1065            break;
1066          case 4:
1067            microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1068                    machInst, ufp0, rn, 0, align);
1069            break;
1070        }
1071        break;
1072      case 6:
1073        microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1074                machInst, ufp0, rn, 0, align);
1075        break;
1076      case 8:
1077        switch (eBytes) {
1078          case 2:
1079            microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1080                    machInst, ufp0, rn, 0, align);
1081            break;
1082          case 4:
1083            microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1084                    machInst, ufp0, rn, 0, align);
1085            break;
1086        }
1087        break;
1088      case 12:
1089        microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1090                machInst, ufp0, rn, 0, align);
1091        break;
1092      case 16:
1093        microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1094                machInst, ufp0, rn, 0, align);
1095        break;
1096      default:
1097        // Bad store size
1098        microOps[uopIdx++] = new Unknown(machInst);
1099    }
1100    if (wb) {
1101        if (rm != 15 && rm != 13) {
1102            microOps[uopIdx++] =
1103                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1104        } else {
1105            microOps[uopIdx++] =
1106                new MicroAddiUop(machInst, rn, rn, storeSize);
1107        }
1108    }
1109    assert(uopIdx == numMicroops);
1110
1111    for (unsigned i = 0; i < numMicroops - 1; i++) {
1112        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1113        assert(uopPtr);
1114        uopPtr->setDelayedCommit();
1115    }
1116    microOps[0]->setFirstMicroop();
1117    microOps[numMicroops - 1]->setLastMicroop();
1118}
1119
1120VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1121                         OpClass __opClass, RegIndex rn, RegIndex vd,
1122                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
1123                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
1124    PredMacroOp(mnem, machInst, __opClass)
1125{
1126    RegIndex vx = NumFloatV8ArchRegs / 4;
1127    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1128    bool baseIsSP = isSP((IntRegIndex) rnsp);
1129
1130    numMicroops = wb ? 1 : 0;
1131
1132    int totNumBytes = numRegs * dataSize / 8;
1133    assert(totNumBytes <= 64);
1134
1135    // The guiding principle here is that no more than 16 bytes can be
1136    // transferred at a time
1137    int numMemMicroops = totNumBytes / 16;
1138    int residuum = totNumBytes % 16;
1139    if (residuum)
1140        ++numMemMicroops;
1141    numMicroops += numMemMicroops;
1142
1143    int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1144    numMicroops += numMarshalMicroops;
1145
1146    microOps = new StaticInstPtr[numMicroops];
1147    unsigned uopIdx = 0;
1148    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1149        TLB::AllowUnaligned;
1150
1151    int i = 0;
1152    for (; i < numMemMicroops - 1; ++i) {
1153        microOps[uopIdx++] = new MicroNeonLoad64(
1154            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1155            baseIsSP, 16 /* accSize */, eSize);
1156    }
1157    microOps[uopIdx++] =  new MicroNeonLoad64(
1158        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1159        residuum ? residuum : 16 /* accSize */, eSize);
1160
1161    // Writeback microop: the post-increment amount is encoded in "Rm": a
1162    // 64-bit general register OR as '11111' for an immediate value equal to
1163    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1164    if (wb) {
1165        if (rm != ((RegIndex) INTREG_X31)) {
1166            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1167                                                      UXTX, 0);
1168        } else {
1169            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1170                                                   totNumBytes);
1171        }
1172    }
1173
1174    for (int i = 0; i < numMarshalMicroops; ++i) {
1175        switch(numRegs) {
1176            case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1177                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1178                        numStructElems, 1, i /* step */);
1179                    break;
1180            case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1181                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1182                        numStructElems, 2, i /* step */);
1183                    break;
1184            case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1185                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1186                        numStructElems, 3, i /* step */);
1187                    break;
1188            case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1189                        machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1190                        numStructElems, 4, i /* step */);
1191                    break;
1192            default: panic("Invalid number of registers");
1193        }
1194
1195    }
1196
1197    assert(uopIdx == numMicroops);
1198
1199    for (int i = 0; i < numMicroops - 1; ++i) {
1200        microOps[i]->setDelayedCommit();
1201    }
1202    microOps[numMicroops - 1]->setLastMicroop();
1203}
1204
1205VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1206                         OpClass __opClass, RegIndex rn, RegIndex vd,
1207                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
1208                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
1209    PredMacroOp(mnem, machInst, __opClass)
1210{
1211    RegIndex vx = NumFloatV8ArchRegs / 4;
1212    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1213    bool baseIsSP = isSP((IntRegIndex) rnsp);
1214
1215    numMicroops = wb ? 1 : 0;
1216
1217    int totNumBytes = numRegs * dataSize / 8;
1218    assert(totNumBytes <= 64);
1219
1220    // The guiding principle here is that no more than 16 bytes can be
1221    // transferred at a time
1222    int numMemMicroops = totNumBytes / 16;
1223    int residuum = totNumBytes % 16;
1224    if (residuum)
1225        ++numMemMicroops;
1226    numMicroops += numMemMicroops;
1227
1228    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1229    numMicroops += numMarshalMicroops;
1230
1231    microOps = new StaticInstPtr[numMicroops];
1232    unsigned uopIdx = 0;
1233
1234    for (int i = 0; i < numMarshalMicroops; ++i) {
1235        switch (numRegs) {
1236            case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1237                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1238                        numStructElems, 1, i /* step */);
1239                    break;
1240            case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1241                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1242                        numStructElems, 2, i /* step */);
1243                    break;
1244            case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1245                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1246                        numStructElems, 3, i /* step */);
1247                    break;
1248            case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1249                        machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1250                        numStructElems, 4, i /* step */);
1251                    break;
1252            default: panic("Invalid number of registers");
1253        }
1254    }
1255
1256    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1257        TLB::AllowUnaligned;
1258
1259    int i = 0;
1260    for (; i < numMemMicroops - 1; ++i) {
1261        microOps[uopIdx++] = new MicroNeonStore64(
1262            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1263            baseIsSP, 16 /* accSize */, eSize);
1264    }
1265    microOps[uopIdx++] = new MicroNeonStore64(
1266        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1267        residuum ? residuum : 16 /* accSize */, eSize);
1268
1269    // Writeback microop: the post-increment amount is encoded in "Rm": a
1270    // 64-bit general register OR as '11111' for an immediate value equal to
1271    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1272    if (wb) {
1273        if (rm != ((RegIndex) INTREG_X31)) {
1274            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1275                                                      UXTX, 0);
1276        } else {
1277            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1278                                                   totNumBytes);
1279        }
1280    }
1281
1282    assert(uopIdx == numMicroops);
1283
1284    for (int i = 0; i < numMicroops - 1; i++) {
1285        microOps[i]->setDelayedCommit();
1286    }
1287    microOps[numMicroops - 1]->setLastMicroop();
1288}
1289
1290VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1291                             OpClass __opClass, RegIndex rn, RegIndex vd,
1292                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
1293                             uint8_t numStructElems, uint8_t index, bool wb,
1294                             bool replicate) :
1295    PredMacroOp(mnem, machInst, __opClass),
1296    eSize(0), dataSize(0), numStructElems(0), index(0),
1297    wb(false), replicate(false)
1298
1299{
1300    RegIndex vx = NumFloatV8ArchRegs / 4;
1301    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1302    bool baseIsSP = isSP((IntRegIndex) rnsp);
1303
1304    numMicroops = wb ? 1 : 0;
1305
1306    int eSizeBytes = 1 << eSize;
1307    int totNumBytes = numStructElems * eSizeBytes;
1308    assert(totNumBytes <= 64);
1309
1310    // The guiding principle here is that no more than 16 bytes can be
1311    // transferred at a time
1312    int numMemMicroops = totNumBytes / 16;
1313    int residuum = totNumBytes % 16;
1314    if (residuum)
1315        ++numMemMicroops;
1316    numMicroops += numMemMicroops;
1317
1318    int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1319    numMicroops += numMarshalMicroops;
1320
1321    microOps = new StaticInstPtr[numMicroops];
1322    unsigned uopIdx = 0;
1323
1324    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1325        TLB::AllowUnaligned;
1326
1327    int i = 0;
1328    for (; i < numMemMicroops - 1; ++i) {
1329        microOps[uopIdx++] = new MicroNeonLoad64(
1330            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1331            baseIsSP, 16 /* accSize */, eSize);
1332    }
1333    microOps[uopIdx++] = new MicroNeonLoad64(
1334        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1335        residuum ? residuum : 16 /* accSize */, eSize);
1336
1337    // Writeback microop: the post-increment amount is encoded in "Rm": a
1338    // 64-bit general register OR as '11111' for an immediate value equal to
1339    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1340    if (wb) {
1341        if (rm != ((RegIndex) INTREG_X31)) {
1342            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1343                                                      UXTX, 0);
1344        } else {
1345            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1346                                                   totNumBytes);
1347        }
1348    }
1349
1350    for (int i = 0; i < numMarshalMicroops; ++i) {
1351        microOps[uopIdx++] = new MicroUnpackNeon64(
1352            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1353            numStructElems, index, i /* step */, replicate);
1354    }
1355
1356    assert(uopIdx == numMicroops);
1357
1358    for (int i = 0; i < numMicroops - 1; i++) {
1359        microOps[i]->setDelayedCommit();
1360    }
1361    microOps[numMicroops - 1]->setLastMicroop();
1362}
1363
1364VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1365                             OpClass __opClass, RegIndex rn, RegIndex vd,
1366                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
1367                             uint8_t numStructElems, uint8_t index, bool wb,
1368                             bool replicate) :
1369    PredMacroOp(mnem, machInst, __opClass),
1370    eSize(0), dataSize(0), numStructElems(0), index(0),
1371    wb(false), replicate(false)
1372{
1373    RegIndex vx = NumFloatV8ArchRegs / 4;
1374    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1375    bool baseIsSP = isSP((IntRegIndex) rnsp);
1376
1377    numMicroops = wb ? 1 : 0;
1378
1379    int eSizeBytes = 1 << eSize;
1380    int totNumBytes = numStructElems * eSizeBytes;
1381    assert(totNumBytes <= 64);
1382
1383    // The guiding principle here is that no more than 16 bytes can be
1384    // transferred at a time
1385    int numMemMicroops = totNumBytes / 16;
1386    int residuum = totNumBytes % 16;
1387    if (residuum)
1388        ++numMemMicroops;
1389    numMicroops += numMemMicroops;
1390
1391    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1392    numMicroops += numMarshalMicroops;
1393
1394    microOps = new StaticInstPtr[numMicroops];
1395    unsigned uopIdx = 0;
1396
1397    for (int i = 0; i < numMarshalMicroops; ++i) {
1398        microOps[uopIdx++] = new MicroPackNeon64(
1399            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1400            numStructElems, index, i /* step */, replicate);
1401    }
1402
1403    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1404        TLB::AllowUnaligned;
1405
1406    int i = 0;
1407    for (; i < numMemMicroops - 1; ++i) {
1408        microOps[uopIdx++] = new MicroNeonStore64(
1409            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1410            baseIsSP, 16 /* accsize */, eSize);
1411    }
1412    microOps[uopIdx++] = new MicroNeonStore64(
1413        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1414        residuum ? residuum : 16 /* accSize */, eSize);
1415
1416    // Writeback microop: the post-increment amount is encoded in "Rm": a
1417    // 64-bit general register OR as '11111' for an immediate value equal to
1418    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1419    if (wb) {
1420        if (rm != ((RegIndex) INTREG_X31)) {
1421            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1422                                                      UXTX, 0);
1423        } else {
1424            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1425                                                   totNumBytes);
1426        }
1427    }
1428
1429    assert(uopIdx == numMicroops);
1430
1431    for (int i = 0; i < numMicroops - 1; i++) {
1432        microOps[i]->setDelayedCommit();
1433    }
1434    microOps[numMicroops - 1]->setLastMicroop();
1435}
1436
1437MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1438                             OpClass __opClass, IntRegIndex rn,
1439                             RegIndex vd, bool single, bool up,
1440                             bool writeback, bool load, uint32_t offset) :
1441    PredMacroOp(mnem, machInst, __opClass)
1442{
1443    int i = 0;
1444
1445    // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1446    // to be functionally identical except that fldmx is deprecated. For now
1447    // we'll assume they're otherwise interchangable.
1448    int count = (single ? offset : (offset / 2));
1449    if (count == 0 || count > NumFloatV7ArchRegs)
1450        warn_once("Bad offset field for VFP load/store multiple.\n");
1451    if (count == 0) {
1452        // Force there to be at least one microop so the macroop makes sense.
1453        writeback = true;
1454    }
1455    if (count > NumFloatV7ArchRegs)
1456        count = NumFloatV7ArchRegs;
1457
1458    numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1459    microOps = new StaticInstPtr[numMicroops];
1460
1461    int64_t addr = 0;
1462
1463    if (!up)
1464        addr = 4 * offset;
1465
1466    bool tempUp = up;
1467    for (int j = 0; j < count; j++) {
1468        if (load) {
1469            if (single) {
1470                microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1471                                                  tempUp, addr);
1472            } else {
1473                microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1474                                                    tempUp, addr);
1475                microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1476                                                    addr + (up ? 4 : -4));
1477            }
1478        } else {
1479            if (single) {
1480                microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1481                                                  tempUp, addr);
1482            } else {
1483                microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1484                                                    tempUp, addr);
1485                microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1486                                                    addr + (up ? 4 : -4));
1487            }
1488        }
1489        if (!tempUp) {
1490            addr -= (single ? 4 : 8);
1491            // The microops don't handle negative displacement, so turn if we
1492            // hit zero, flip polarity and start adding.
1493            if (addr <= 0) {
1494                tempUp = true;
1495                addr = -addr;
1496            }
1497        } else {
1498            addr += (single ? 4 : 8);
1499        }
1500    }
1501
1502    if (writeback) {
1503        if (up) {
1504            microOps[i++] =
1505                new MicroAddiUop(machInst, rn, rn, 4 * offset);
1506        } else {
1507            microOps[i++] =
1508                new MicroSubiUop(machInst, rn, rn, 4 * offset);
1509        }
1510    }
1511
1512    assert(numMicroops == i);
1513    microOps[numMicroops - 1]->setLastMicroop();
1514
1515    for (StaticInstPtr *curUop = microOps;
1516            !(*curUop)->isLastMicroop(); curUop++) {
1517        MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1518        assert(uopPtr);
1519        uopPtr->setDelayedCommit();
1520    }
1521}
1522
1523std::string
1524MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1525{
1526    std::stringstream ss;
1527    printMnemonic(ss);
1528    printIntReg(ss, ura);
1529    ss << ", ";
1530    printIntReg(ss, urb);
1531    ss << ", ";
1532    ccprintf(ss, "#%d", imm);
1533    return ss.str();
1534}
1535
1536std::string
1537MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1538{
1539    std::stringstream ss;
1540    printMnemonic(ss);
1541    printIntReg(ss, ura);
1542    ss << ", ";
1543    printIntReg(ss, urb);
1544    ss << ", ";
1545    ccprintf(ss, "#%d", imm);
1546    return ss.str();
1547}
1548
1549std::string
1550MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1551{
1552    std::stringstream ss;
1553    printMnemonic(ss);
1554    ss << "[PC,CPSR]";
1555    return ss.str();
1556}
1557
1558std::string
1559MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1560{
1561    std::stringstream ss;
1562    printMnemonic(ss);
1563    printIntReg(ss, ura);
1564    ccprintf(ss, ", ");
1565    printIntReg(ss, urb);
1566    printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1567    return ss.str();
1568}
1569
1570std::string
1571MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1572{
1573    std::stringstream ss;
1574    printMnemonic(ss);
1575    printIntReg(ss, ura);
1576    ss << ", ";
1577    printIntReg(ss, urb);
1578    return ss.str();
1579}
1580
1581std::string
1582MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1583{
1584    std::stringstream ss;
1585    printMnemonic(ss);
1586    printIntReg(ss, ura);
1587    ss << ", ";
1588    printIntReg(ss, urb);
1589    ss << ", ";
1590    printIntReg(ss, urc);
1591    return ss.str();
1592}
1593
1594std::string
1595MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1596{
1597    std::stringstream ss;
1598    printMnemonic(ss);
1599    if (isFloating())
1600        printFloatReg(ss, ura);
1601    else
1602        printIntReg(ss, ura);
1603    ss << ", [";
1604    printIntReg(ss, urb);
1605    ss << ", ";
1606    ccprintf(ss, "#%d", imm);
1607    ss << "]";
1608    return ss.str();
1609}
1610
1611std::string
1612MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1613{
1614    std::stringstream ss;
1615    printMnemonic(ss);
1616    printIntReg(ss, dest);
1617    ss << ",";
1618    printIntReg(ss, dest2);
1619    ss << ", [";
1620    printIntReg(ss, urb);
1621    ss << ", ";
1622    ccprintf(ss, "#%d", imm);
1623    ss << "]";
1624    return ss.str();
1625}
1626
1627}
1628