macromem.cc (11321:02e930db812d) macromem.cc (11793:ef606668d247)
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include <sstream>
44
45#include "arch/arm/insts/macromem.hh"
46
43#include "arch/arm/insts/macromem.hh"
44
45#include <sstream>
46
47#include "arch/arm/generated/decoder.hh"
48#include "arch/arm/insts/neon64_mem.hh"
49
50using namespace std;
51using namespace ArmISAInst;
52
53namespace ArmISA
54{
55
56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57 OpClass __opClass, IntRegIndex rn,
58 bool index, bool up, bool user, bool writeback,
59 bool load, uint32_t reglist) :
60 PredMacroOp(mnem, machInst, __opClass)
61{
62 uint32_t regs = reglist;
63 uint32_t ones = number_of_ones(reglist);
64 uint32_t mem_ops = ones;
65
66 // Copy the base address register if we overwrite it, or if this instruction
67 // is basically a no-op (we have to do something)
68 bool copy_base = (bits(reglist, rn) && load) || !ones;
69 bool force_user = user & !bits(reglist, 15);
70 bool exception_ret = user & bits(reglist, 15);
71 bool pc_temp = load && writeback && bits(reglist, 15);
72
73 if (!ones) {
74 numMicroops = 1;
75 } else if (load) {
76 numMicroops = ((ones + 1) / 2)
77 + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
78 + (copy_base ? 1 : 0)
79 + (writeback? 1 : 0)
80 + (pc_temp ? 1 : 0);
81 } else {
82 numMicroops = ones + (writeback ? 1 : 0);
83 }
84
85 microOps = new StaticInstPtr[numMicroops];
86
87 uint32_t addr = 0;
88
89 if (!up)
90 addr = (ones << 2) - 4;
91
92 if (!index)
93 addr += 4;
94
95 StaticInstPtr *uop = microOps;
96
97 // Add 0 to Rn and stick it in ureg0.
98 // This is equivalent to a move.
99 if (copy_base)
100 *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
101
102 unsigned reg = 0;
103 while (mem_ops != 0) {
104 // Do load operations in pairs if possible
105 if (load && mem_ops >= 2 &&
106 !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
107 // 64-bit memory operation
108 // Find 2 set register bits (clear them after finding)
109 unsigned reg_idx1;
110 unsigned reg_idx2;
111
112 // Find the first register
113 while (!bits(regs, reg)) reg++;
114 replaceBits(regs, reg, 0);
115 reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
116
117 // Find the second register
118 while (!bits(regs, reg)) reg++;
119 replaceBits(regs, reg, 0);
120 reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
121
122 // Load into temp reg if necessary
123 if (reg_idx2 == INTREG_PC && pc_temp)
124 reg_idx2 = INTREG_UREG1;
125
126 // Actually load both registers from memory
127 *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
128 copy_base ? INTREG_UREG0 : rn, up, addr);
129
130 if (!writeback && reg_idx2 == INTREG_PC) {
131 // No writeback if idx==pc, set appropriate flags
132 (*uop)->setFlag(StaticInst::IsControl);
133 (*uop)->setFlag(StaticInst::IsIndirectControl);
134
135 if (!(condCode == COND_AL || condCode == COND_UC))
136 (*uop)->setFlag(StaticInst::IsCondControl);
137 else
138 (*uop)->setFlag(StaticInst::IsUncondControl);
139 }
140
141 if (up) addr += 8;
142 else addr -= 8;
143 mem_ops -= 2;
144 } else {
145 // 32-bit memory operation
146 // Find register for operation
147 unsigned reg_idx;
148 while (!bits(regs, reg)) reg++;
149 replaceBits(regs, reg, 0);
150 reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
151
152 if (load) {
153 if (writeback && reg_idx == INTREG_PC) {
154 // If this instruction changes the PC and performs a
155 // writeback, ensure the pc load/branch is the last uop.
156 // Load into a temp reg here.
157 *uop = new MicroLdrUop(machInst, INTREG_UREG1,
158 copy_base ? INTREG_UREG0 : rn, up, addr);
159 } else if (reg_idx == INTREG_PC && exception_ret) {
160 // Special handling for exception return
161 *uop = new MicroLdrRetUop(machInst, reg_idx,
162 copy_base ? INTREG_UREG0 : rn, up, addr);
163 } else {
164 // standard single load uop
165 *uop = new MicroLdrUop(machInst, reg_idx,
166 copy_base ? INTREG_UREG0 : rn, up, addr);
167 }
168
169 // Loading pc as last operation? Set appropriate flags.
170 if (!writeback && reg_idx == INTREG_PC) {
171 (*uop)->setFlag(StaticInst::IsControl);
172 (*uop)->setFlag(StaticInst::IsIndirectControl);
173
174 if (!(condCode == COND_AL || condCode == COND_UC))
175 (*uop)->setFlag(StaticInst::IsCondControl);
176 else
177 (*uop)->setFlag(StaticInst::IsUncondControl);
178 }
179 } else {
180 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
181 }
182
183 if (up) addr += 4;
184 else addr -= 4;
185 --mem_ops;
186 }
187
188 // Load/store micro-op generated, go to next uop
189 ++uop;
190 }
191
192 if (writeback && ones) {
193 // Perform writeback uop operation
194 if (up)
195 *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
196 else
197 *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
198
199 // Write PC after address writeback?
200 if (pc_temp) {
201 if (exception_ret) {
202 *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
203 } else {
204 *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
205 }
206 (*uop)->setFlag(StaticInst::IsControl);
207 (*uop)->setFlag(StaticInst::IsIndirectControl);
208
209 if (!(condCode == COND_AL || condCode == COND_UC))
210 (*uop)->setFlag(StaticInst::IsCondControl);
211 else
212 (*uop)->setFlag(StaticInst::IsUncondControl);
213
214 if (rn == INTREG_SP)
215 (*uop)->setFlag(StaticInst::IsReturn);
216
217 ++uop;
218 }
219 }
220
221 --uop;
222 (*uop)->setLastMicroop();
223 microOps[0]->setFirstMicroop();
224
225 /* Take the control flags from the last microop for the macroop */
226 if ((*uop)->isControl())
227 setFlag(StaticInst::IsControl);
228 if ((*uop)->isCondCtrl())
229 setFlag(StaticInst::IsCondControl);
230 if ((*uop)->isUncondCtrl())
231 setFlag(StaticInst::IsUncondControl);
232 if ((*uop)->isIndirectCtrl())
233 setFlag(StaticInst::IsIndirectControl);
234 if ((*uop)->isReturn())
235 setFlag(StaticInst::IsReturn);
236
237 for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
238 (*uop)->setDelayedCommit();
239 }
240}
241
242PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
243 uint32_t size, bool fp, bool load, bool noAlloc,
244 bool signExt, bool exclusive, bool acrel,
245 int64_t imm, AddrMode mode,
246 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
247 PredMacroOp(mnem, machInst, __opClass)
248{
249 bool post = (mode == AddrMd_PostIndex);
250 bool writeback = (mode != AddrMd_Offset);
251
252 if (load) {
253 // Use integer rounding to round up loads of size 4
254 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
255 } else {
256 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
257 }
258 microOps = new StaticInstPtr[numMicroops];
259
260 StaticInstPtr *uop = microOps;
261
262 rn = makeSP(rn);
263
264 if (!post) {
265 *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
266 post ? 0 : imm);
267 }
268
269 if (fp) {
270 if (size == 16) {
271 if (load) {
272 *uop++ = new MicroLdFp16Uop(machInst, rt,
273 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
274 *uop++ = new MicroLdFp16Uop(machInst, rt2,
275 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
276 } else {
277 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
278 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
279 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
280 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
281 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
282 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
283 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
284 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
285 }
286 } else if (size == 8) {
287 if (load) {
288 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
289 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
290 } else {
291 *uop++ = new MicroStrFpXImmUop(machInst, rt,
292 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
293 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
294 post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
295 }
296 } else if (size == 4) {
297 if (load) {
298 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
299 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
300 } else {
301 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
302 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
303 }
304 }
305 } else {
306 if (size == 8) {
307 if (load) {
308 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
309 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
310 } else {
311 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
312 0, noAlloc, exclusive, acrel);
313 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
314 size, noAlloc, exclusive, acrel);
315 }
316 } else if (size == 4) {
317 if (load) {
318 if (signExt) {
319 *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
320 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
321 } else {
322 *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
323 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
324 }
325 } else {
326 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
327 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
328 }
329 }
330 }
331
332 if (writeback) {
333 *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
334 post ? imm : 0);
335 }
336
337 assert(uop == &microOps[numMicroops]);
338 (*--uop)->setLastMicroop();
339 microOps[0]->setFirstMicroop();
340
341 for (StaticInstPtr *curUop = microOps;
342 !(*curUop)->isLastMicroop(); curUop++) {
343 (*curUop)->setDelayedCommit();
344 }
345}
346
347BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
348 OpClass __opClass, bool load, IntRegIndex dest,
349 IntRegIndex base, int64_t imm) :
350 PredMacroOp(mnem, machInst, __opClass)
351{
352 numMicroops = load ? 1 : 2;
353 microOps = new StaticInstPtr[numMicroops];
354
355 StaticInstPtr *uop = microOps;
356
357 if (load) {
358 *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
359 } else {
360 *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
361 (*uop)->setDelayedCommit();
362 *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
363 }
364 (*uop)->setLastMicroop();
365 microOps[0]->setFirstMicroop();
366}
367
368BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
369 OpClass __opClass, bool load, IntRegIndex dest,
370 IntRegIndex base, int64_t imm) :
371 PredMacroOp(mnem, machInst, __opClass)
372{
373 numMicroops = load ? 2 : 3;
374 microOps = new StaticInstPtr[numMicroops];
375
376 StaticInstPtr *uop = microOps;
377
378 if (load) {
379 *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
380 } else {
381 *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
382 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
383 }
384 *uop = new MicroAddXiUop(machInst, base, base, imm);
385 (*uop)->setLastMicroop();
386 microOps[0]->setFirstMicroop();
387
388 for (StaticInstPtr *curUop = microOps;
389 !(*curUop)->isLastMicroop(); curUop++) {
390 (*curUop)->setDelayedCommit();
391 }
392}
393
394BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
395 OpClass __opClass, bool load, IntRegIndex dest,
396 IntRegIndex base, int64_t imm) :
397 PredMacroOp(mnem, machInst, __opClass)
398{
399 numMicroops = load ? 2 : 3;
400 microOps = new StaticInstPtr[numMicroops];
401
402 StaticInstPtr *uop = microOps;
403
404 if (load) {
405 *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
406 } else {
407 *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
408 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
409 }
410 *uop = new MicroAddXiUop(machInst, base, base, imm);
411 (*uop)->setLastMicroop();
412 microOps[0]->setFirstMicroop();
413
414 for (StaticInstPtr *curUop = microOps;
415 !(*curUop)->isLastMicroop(); curUop++) {
416 (*curUop)->setDelayedCommit();
417 }
418}
419
420BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
421 OpClass __opClass, bool load, IntRegIndex dest,
422 IntRegIndex base, IntRegIndex offset,
423 ArmExtendType type, int64_t imm) :
424 PredMacroOp(mnem, machInst, __opClass)
425{
426 numMicroops = load ? 1 : 2;
427 microOps = new StaticInstPtr[numMicroops];
428
429 StaticInstPtr *uop = microOps;
430
431 if (load) {
432 *uop = new MicroLdFp16RegUop(machInst, dest, base,
433 offset, type, imm);
434 } else {
435 *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
436 offset, type, imm);
437 (*uop)->setDelayedCommit();
438 *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
439 offset, type, imm);
440 }
441
442 (*uop)->setLastMicroop();
443 microOps[0]->setFirstMicroop();
444}
445
446BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
447 OpClass __opClass, IntRegIndex dest,
448 int64_t imm) :
449 PredMacroOp(mnem, machInst, __opClass)
450{
451 numMicroops = 1;
452 microOps = new StaticInstPtr[numMicroops];
453
454 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
455 microOps[0]->setLastMicroop();
456 microOps[0]->setFirstMicroop();
457}
458
459VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
460 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
461 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
462 PredMacroOp(mnem, machInst, __opClass)
463{
464 assert(regs > 0 && regs <= 4);
465 assert(regs % elems == 0);
466
467 numMicroops = (regs > 2) ? 2 : 1;
468 bool wb = (rm != 15);
469 bool deinterleave = (elems > 1);
470
471 if (wb) numMicroops++;
472 if (deinterleave) numMicroops += (regs / elems);
473 microOps = new StaticInstPtr[numMicroops];
474
475 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
476
477 uint32_t noAlign = TLB::MustBeOne;
478
479 unsigned uopIdx = 0;
480 switch (regs) {
481 case 4:
482 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
483 size, machInst, rMid, rn, 0, align);
484 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
485 size, machInst, rMid + 4, rn, 16, noAlign);
486 break;
487 case 3:
488 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
489 size, machInst, rMid, rn, 0, align);
490 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
491 size, machInst, rMid + 4, rn, 16, noAlign);
492 break;
493 case 2:
494 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
495 size, machInst, rMid, rn, 0, align);
496 break;
497 case 1:
498 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
499 size, machInst, rMid, rn, 0, align);
500 break;
501 default:
502 // Unknown number of registers
503 microOps[uopIdx++] = new Unknown(machInst);
504 }
505 if (wb) {
506 if (rm != 15 && rm != 13) {
507 microOps[uopIdx++] =
508 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
509 } else {
510 microOps[uopIdx++] =
511 new MicroAddiUop(machInst, rn, rn, regs * 8);
512 }
513 }
514 if (deinterleave) {
515 switch (elems) {
516 case 4:
517 assert(regs == 4);
518 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
519 size, machInst, vd * 2, rMid, inc * 2);
520 break;
521 case 3:
522 assert(regs == 3);
523 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
524 size, machInst, vd * 2, rMid, inc * 2);
525 break;
526 case 2:
527 assert(regs == 4 || regs == 2);
528 if (regs == 4) {
529 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
530 size, machInst, vd * 2, rMid, inc * 2);
531 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
532 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
533 } else {
534 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
535 size, machInst, vd * 2, rMid, inc * 2);
536 }
537 break;
538 default:
539 // Bad number of elements to deinterleave
540 microOps[uopIdx++] = new Unknown(machInst);
541 }
542 }
543 assert(uopIdx == numMicroops);
544
545 for (unsigned i = 0; i < numMicroops - 1; i++) {
546 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
547 assert(uopPtr);
548 uopPtr->setDelayedCommit();
549 }
550 microOps[0]->setFirstMicroop();
551 microOps[numMicroops - 1]->setLastMicroop();
552}
553
554VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
555 OpClass __opClass, bool all, unsigned elems,
556 RegIndex rn, RegIndex vd, unsigned regs,
557 unsigned inc, uint32_t size, uint32_t align,
558 RegIndex rm, unsigned lane) :
559 PredMacroOp(mnem, machInst, __opClass)
560{
561 assert(regs > 0 && regs <= 4);
562 assert(regs % elems == 0);
563
564 unsigned eBytes = (1 << size);
565 unsigned loadSize = eBytes * elems;
566 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
567 sizeof(FloatRegBits);
568
569 assert(loadRegs > 0 && loadRegs <= 4);
570
571 numMicroops = 1;
572 bool wb = (rm != 15);
573
574 if (wb) numMicroops++;
575 numMicroops += (regs / elems);
576 microOps = new StaticInstPtr[numMicroops];
577
578 RegIndex ufp0 = NumFloatV7ArchRegs;
579
580 unsigned uopIdx = 0;
581 switch (loadSize) {
582 case 1:
583 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
584 machInst, ufp0, rn, 0, align);
585 break;
586 case 2:
587 if (eBytes == 2) {
588 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
589 machInst, ufp0, rn, 0, align);
590 } else {
591 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
592 machInst, ufp0, rn, 0, align);
593 }
594 break;
595 case 3:
596 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
597 machInst, ufp0, rn, 0, align);
598 break;
599 case 4:
600 switch (eBytes) {
601 case 1:
602 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
603 machInst, ufp0, rn, 0, align);
604 break;
605 case 2:
606 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
607 machInst, ufp0, rn, 0, align);
608 break;
609 case 4:
610 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
611 machInst, ufp0, rn, 0, align);
612 break;
613 }
614 break;
615 case 6:
616 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
617 machInst, ufp0, rn, 0, align);
618 break;
619 case 8:
620 switch (eBytes) {
621 case 2:
622 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
623 machInst, ufp0, rn, 0, align);
624 break;
625 case 4:
626 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
627 machInst, ufp0, rn, 0, align);
628 break;
629 }
630 break;
631 case 12:
632 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
633 machInst, ufp0, rn, 0, align);
634 break;
635 case 16:
636 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
637 machInst, ufp0, rn, 0, align);
638 break;
639 default:
640 // Unrecognized load size
641 microOps[uopIdx++] = new Unknown(machInst);
642 }
643 if (wb) {
644 if (rm != 15 && rm != 13) {
645 microOps[uopIdx++] =
646 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
647 } else {
648 microOps[uopIdx++] =
649 new MicroAddiUop(machInst, rn, rn, loadSize);
650 }
651 }
652 switch (elems) {
653 case 4:
654 assert(regs == 4);
655 switch (size) {
656 case 0:
657 if (all) {
658 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
659 machInst, vd * 2, ufp0, inc * 2);
660 } else {
661 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
662 machInst, vd * 2, ufp0, inc * 2, lane);
663 }
664 break;
665 case 1:
666 if (all) {
667 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
668 machInst, vd * 2, ufp0, inc * 2);
669 } else {
670 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
671 machInst, vd * 2, ufp0, inc * 2, lane);
672 }
673 break;
674 case 2:
675 if (all) {
676 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
677 machInst, vd * 2, ufp0, inc * 2);
678 } else {
679 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
680 machInst, vd * 2, ufp0, inc * 2, lane);
681 }
682 break;
683 default:
684 // Bad size
685 microOps[uopIdx++] = new Unknown(machInst);
686 break;
687 }
688 break;
689 case 3:
690 assert(regs == 3);
691 switch (size) {
692 case 0:
693 if (all) {
694 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
695 machInst, vd * 2, ufp0, inc * 2);
696 } else {
697 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
698 machInst, vd * 2, ufp0, inc * 2, lane);
699 }
700 break;
701 case 1:
702 if (all) {
703 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
704 machInst, vd * 2, ufp0, inc * 2);
705 } else {
706 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
707 machInst, vd * 2, ufp0, inc * 2, lane);
708 }
709 break;
710 case 2:
711 if (all) {
712 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
713 machInst, vd * 2, ufp0, inc * 2);
714 } else {
715 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
716 machInst, vd * 2, ufp0, inc * 2, lane);
717 }
718 break;
719 default:
720 // Bad size
721 microOps[uopIdx++] = new Unknown(machInst);
722 break;
723 }
724 break;
725 case 2:
726 assert(regs == 2);
727 assert(loadRegs <= 2);
728 switch (size) {
729 case 0:
730 if (all) {
731 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
732 machInst, vd * 2, ufp0, inc * 2);
733 } else {
734 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
735 machInst, vd * 2, ufp0, inc * 2, lane);
736 }
737 break;
738 case 1:
739 if (all) {
740 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
741 machInst, vd * 2, ufp0, inc * 2);
742 } else {
743 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
744 machInst, vd * 2, ufp0, inc * 2, lane);
745 }
746 break;
747 case 2:
748 if (all) {
749 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
750 machInst, vd * 2, ufp0, inc * 2);
751 } else {
752 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
753 machInst, vd * 2, ufp0, inc * 2, lane);
754 }
755 break;
756 default:
757 // Bad size
758 microOps[uopIdx++] = new Unknown(machInst);
759 break;
760 }
761 break;
762 case 1:
763 assert(regs == 1 || (all && regs == 2));
764 assert(loadRegs <= 2);
765 for (unsigned offset = 0; offset < regs; offset++) {
766 switch (size) {
767 case 0:
768 if (all) {
769 microOps[uopIdx++] =
770 new MicroUnpackAllNeon2to2Uop<uint8_t>(
771 machInst, (vd + offset) * 2, ufp0, inc * 2);
772 } else {
773 microOps[uopIdx++] =
774 new MicroUnpackNeon2to2Uop<uint8_t>(
775 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
776 }
777 break;
778 case 1:
779 if (all) {
780 microOps[uopIdx++] =
781 new MicroUnpackAllNeon2to2Uop<uint16_t>(
782 machInst, (vd + offset) * 2, ufp0, inc * 2);
783 } else {
784 microOps[uopIdx++] =
785 new MicroUnpackNeon2to2Uop<uint16_t>(
786 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
787 }
788 break;
789 case 2:
790 if (all) {
791 microOps[uopIdx++] =
792 new MicroUnpackAllNeon2to2Uop<uint32_t>(
793 machInst, (vd + offset) * 2, ufp0, inc * 2);
794 } else {
795 microOps[uopIdx++] =
796 new MicroUnpackNeon2to2Uop<uint32_t>(
797 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
798 }
799 break;
800 default:
801 // Bad size
802 microOps[uopIdx++] = new Unknown(machInst);
803 break;
804 }
805 }
806 break;
807 default:
808 // Bad number of elements to unpack
809 microOps[uopIdx++] = new Unknown(machInst);
810 }
811 assert(uopIdx == numMicroops);
812
813 for (unsigned i = 0; i < numMicroops - 1; i++) {
814 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
815 assert(uopPtr);
816 uopPtr->setDelayedCommit();
817 }
818 microOps[0]->setFirstMicroop();
819 microOps[numMicroops - 1]->setLastMicroop();
820}
821
822VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
823 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
824 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
825 PredMacroOp(mnem, machInst, __opClass)
826{
827 assert(regs > 0 && regs <= 4);
828 assert(regs % elems == 0);
829
830 numMicroops = (regs > 2) ? 2 : 1;
831 bool wb = (rm != 15);
832 bool interleave = (elems > 1);
833
834 if (wb) numMicroops++;
835 if (interleave) numMicroops += (regs / elems);
836 microOps = new StaticInstPtr[numMicroops];
837
838 uint32_t noAlign = TLB::MustBeOne;
839
840 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
841
842 unsigned uopIdx = 0;
843 if (interleave) {
844 switch (elems) {
845 case 4:
846 assert(regs == 4);
847 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
848 size, machInst, rMid, vd * 2, inc * 2);
849 break;
850 case 3:
851 assert(regs == 3);
852 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
853 size, machInst, rMid, vd * 2, inc * 2);
854 break;
855 case 2:
856 assert(regs == 4 || regs == 2);
857 if (regs == 4) {
858 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
859 size, machInst, rMid, vd * 2, inc * 2);
860 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
861 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
862 } else {
863 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
864 size, machInst, rMid, vd * 2, inc * 2);
865 }
866 break;
867 default:
868 // Bad number of elements to interleave
869 microOps[uopIdx++] = new Unknown(machInst);
870 }
871 }
872 switch (regs) {
873 case 4:
874 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
875 size, machInst, rMid, rn, 0, align);
876 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
877 size, machInst, rMid + 4, rn, 16, noAlign);
878 break;
879 case 3:
880 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
881 size, machInst, rMid, rn, 0, align);
882 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
883 size, machInst, rMid + 4, rn, 16, noAlign);
884 break;
885 case 2:
886 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
887 size, machInst, rMid, rn, 0, align);
888 break;
889 case 1:
890 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
891 size, machInst, rMid, rn, 0, align);
892 break;
893 default:
894 // Unknown number of registers
895 microOps[uopIdx++] = new Unknown(machInst);
896 }
897 if (wb) {
898 if (rm != 15 && rm != 13) {
899 microOps[uopIdx++] =
900 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
901 } else {
902 microOps[uopIdx++] =
903 new MicroAddiUop(machInst, rn, rn, regs * 8);
904 }
905 }
906 assert(uopIdx == numMicroops);
907
908 for (unsigned i = 0; i < numMicroops - 1; i++) {
909 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
910 assert(uopPtr);
911 uopPtr->setDelayedCommit();
912 }
913 microOps[0]->setFirstMicroop();
914 microOps[numMicroops - 1]->setLastMicroop();
915}
916
917VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
918 OpClass __opClass, bool all, unsigned elems,
919 RegIndex rn, RegIndex vd, unsigned regs,
920 unsigned inc, uint32_t size, uint32_t align,
921 RegIndex rm, unsigned lane) :
922 PredMacroOp(mnem, machInst, __opClass)
923{
924 assert(!all);
925 assert(regs > 0 && regs <= 4);
926 assert(regs % elems == 0);
927
928 unsigned eBytes = (1 << size);
929 unsigned storeSize = eBytes * elems;
930 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
931 sizeof(FloatRegBits);
932
933 assert(storeRegs > 0 && storeRegs <= 4);
934
935 numMicroops = 1;
936 bool wb = (rm != 15);
937
938 if (wb) numMicroops++;
939 numMicroops += (regs / elems);
940 microOps = new StaticInstPtr[numMicroops];
941
942 RegIndex ufp0 = NumFloatV7ArchRegs;
943
944 unsigned uopIdx = 0;
945 switch (elems) {
946 case 4:
947 assert(regs == 4);
948 switch (size) {
949 case 0:
950 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
951 machInst, ufp0, vd * 2, inc * 2, lane);
952 break;
953 case 1:
954 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
955 machInst, ufp0, vd * 2, inc * 2, lane);
956 break;
957 case 2:
958 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
959 machInst, ufp0, vd * 2, inc * 2, lane);
960 break;
961 default:
962 // Bad size
963 microOps[uopIdx++] = new Unknown(machInst);
964 break;
965 }
966 break;
967 case 3:
968 assert(regs == 3);
969 switch (size) {
970 case 0:
971 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
972 machInst, ufp0, vd * 2, inc * 2, lane);
973 break;
974 case 1:
975 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
976 machInst, ufp0, vd * 2, inc * 2, lane);
977 break;
978 case 2:
979 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
980 machInst, ufp0, vd * 2, inc * 2, lane);
981 break;
982 default:
983 // Bad size
984 microOps[uopIdx++] = new Unknown(machInst);
985 break;
986 }
987 break;
988 case 2:
989 assert(regs == 2);
990 assert(storeRegs <= 2);
991 switch (size) {
992 case 0:
993 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
994 machInst, ufp0, vd * 2, inc * 2, lane);
995 break;
996 case 1:
997 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
998 machInst, ufp0, vd * 2, inc * 2, lane);
999 break;
1000 case 2:
1001 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1002 machInst, ufp0, vd * 2, inc * 2, lane);
1003 break;
1004 default:
1005 // Bad size
1006 microOps[uopIdx++] = new Unknown(machInst);
1007 break;
1008 }
1009 break;
1010 case 1:
1011 assert(regs == 1 || (all && regs == 2));
1012 assert(storeRegs <= 2);
1013 for (unsigned offset = 0; offset < regs; offset++) {
1014 switch (size) {
1015 case 0:
1016 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1017 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1018 break;
1019 case 1:
1020 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1021 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1022 break;
1023 case 2:
1024 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1025 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1026 break;
1027 default:
1028 // Bad size
1029 microOps[uopIdx++] = new Unknown(machInst);
1030 break;
1031 }
1032 }
1033 break;
1034 default:
1035 // Bad number of elements to unpack
1036 microOps[uopIdx++] = new Unknown(machInst);
1037 }
1038 switch (storeSize) {
1039 case 1:
1040 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1041 machInst, ufp0, rn, 0, align);
1042 break;
1043 case 2:
1044 if (eBytes == 2) {
1045 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1046 machInst, ufp0, rn, 0, align);
1047 } else {
1048 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1049 machInst, ufp0, rn, 0, align);
1050 }
1051 break;
1052 case 3:
1053 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1054 machInst, ufp0, rn, 0, align);
1055 break;
1056 case 4:
1057 switch (eBytes) {
1058 case 1:
1059 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1060 machInst, ufp0, rn, 0, align);
1061 break;
1062 case 2:
1063 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1064 machInst, ufp0, rn, 0, align);
1065 break;
1066 case 4:
1067 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1068 machInst, ufp0, rn, 0, align);
1069 break;
1070 }
1071 break;
1072 case 6:
1073 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1074 machInst, ufp0, rn, 0, align);
1075 break;
1076 case 8:
1077 switch (eBytes) {
1078 case 2:
1079 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1080 machInst, ufp0, rn, 0, align);
1081 break;
1082 case 4:
1083 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1084 machInst, ufp0, rn, 0, align);
1085 break;
1086 }
1087 break;
1088 case 12:
1089 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1090 machInst, ufp0, rn, 0, align);
1091 break;
1092 case 16:
1093 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1094 machInst, ufp0, rn, 0, align);
1095 break;
1096 default:
1097 // Bad store size
1098 microOps[uopIdx++] = new Unknown(machInst);
1099 }
1100 if (wb) {
1101 if (rm != 15 && rm != 13) {
1102 microOps[uopIdx++] =
1103 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1104 } else {
1105 microOps[uopIdx++] =
1106 new MicroAddiUop(machInst, rn, rn, storeSize);
1107 }
1108 }
1109 assert(uopIdx == numMicroops);
1110
1111 for (unsigned i = 0; i < numMicroops - 1; i++) {
1112 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1113 assert(uopPtr);
1114 uopPtr->setDelayedCommit();
1115 }
1116 microOps[0]->setFirstMicroop();
1117 microOps[numMicroops - 1]->setLastMicroop();
1118}
1119
1120VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1121 OpClass __opClass, RegIndex rn, RegIndex vd,
1122 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1123 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1124 PredMacroOp(mnem, machInst, __opClass)
1125{
1126 RegIndex vx = NumFloatV8ArchRegs / 4;
1127 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1128 bool baseIsSP = isSP((IntRegIndex) rnsp);
1129
1130 numMicroops = wb ? 1 : 0;
1131
1132 int totNumBytes = numRegs * dataSize / 8;
1133 assert(totNumBytes <= 64);
1134
1135 // The guiding principle here is that no more than 16 bytes can be
1136 // transferred at a time
1137 int numMemMicroops = totNumBytes / 16;
1138 int residuum = totNumBytes % 16;
1139 if (residuum)
1140 ++numMemMicroops;
1141 numMicroops += numMemMicroops;
1142
1143 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1144 numMicroops += numMarshalMicroops;
1145
1146 microOps = new StaticInstPtr[numMicroops];
1147 unsigned uopIdx = 0;
1148 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1149 TLB::AllowUnaligned;
1150
1151 int i = 0;
1152 for (; i < numMemMicroops - 1; ++i) {
1153 microOps[uopIdx++] = new MicroNeonLoad64(
1154 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1155 baseIsSP, 16 /* accSize */, eSize);
1156 }
1157 microOps[uopIdx++] = new MicroNeonLoad64(
1158 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1159 residuum ? residuum : 16 /* accSize */, eSize);
1160
1161 // Writeback microop: the post-increment amount is encoded in "Rm": a
1162 // 64-bit general register OR as '11111' for an immediate value equal to
1163 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1164 if (wb) {
1165 if (rm != ((RegIndex) INTREG_X31)) {
1166 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1167 UXTX, 0);
1168 } else {
1169 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1170 totNumBytes);
1171 }
1172 }
1173
1174 for (int i = 0; i < numMarshalMicroops; ++i) {
1175 switch(numRegs) {
1176 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1177 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1178 numStructElems, 1, i /* step */);
1179 break;
1180 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1181 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1182 numStructElems, 2, i /* step */);
1183 break;
1184 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1185 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1186 numStructElems, 3, i /* step */);
1187 break;
1188 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1189 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1190 numStructElems, 4, i /* step */);
1191 break;
1192 default: panic("Invalid number of registers");
1193 }
1194
1195 }
1196
1197 assert(uopIdx == numMicroops);
1198
1199 for (int i = 0; i < numMicroops - 1; ++i) {
1200 microOps[i]->setDelayedCommit();
1201 }
1202 microOps[numMicroops - 1]->setLastMicroop();
1203}
1204
1205VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1206 OpClass __opClass, RegIndex rn, RegIndex vd,
1207 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1208 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1209 PredMacroOp(mnem, machInst, __opClass)
1210{
1211 RegIndex vx = NumFloatV8ArchRegs / 4;
1212 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1213 bool baseIsSP = isSP((IntRegIndex) rnsp);
1214
1215 numMicroops = wb ? 1 : 0;
1216
1217 int totNumBytes = numRegs * dataSize / 8;
1218 assert(totNumBytes <= 64);
1219
1220 // The guiding principle here is that no more than 16 bytes can be
1221 // transferred at a time
1222 int numMemMicroops = totNumBytes / 16;
1223 int residuum = totNumBytes % 16;
1224 if (residuum)
1225 ++numMemMicroops;
1226 numMicroops += numMemMicroops;
1227
1228 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1229 numMicroops += numMarshalMicroops;
1230
1231 microOps = new StaticInstPtr[numMicroops];
1232 unsigned uopIdx = 0;
1233
1234 for (int i = 0; i < numMarshalMicroops; ++i) {
1235 switch (numRegs) {
1236 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1237 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1238 numStructElems, 1, i /* step */);
1239 break;
1240 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1241 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1242 numStructElems, 2, i /* step */);
1243 break;
1244 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1245 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1246 numStructElems, 3, i /* step */);
1247 break;
1248 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1249 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1250 numStructElems, 4, i /* step */);
1251 break;
1252 default: panic("Invalid number of registers");
1253 }
1254 }
1255
1256 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1257 TLB::AllowUnaligned;
1258
1259 int i = 0;
1260 for (; i < numMemMicroops - 1; ++i) {
1261 microOps[uopIdx++] = new MicroNeonStore64(
1262 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1263 baseIsSP, 16 /* accSize */, eSize);
1264 }
1265 microOps[uopIdx++] = new MicroNeonStore64(
1266 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1267 residuum ? residuum : 16 /* accSize */, eSize);
1268
1269 // Writeback microop: the post-increment amount is encoded in "Rm": a
1270 // 64-bit general register OR as '11111' for an immediate value equal to
1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1272 if (wb) {
1273 if (rm != ((RegIndex) INTREG_X31)) {
1274 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1275 UXTX, 0);
1276 } else {
1277 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1278 totNumBytes);
1279 }
1280 }
1281
1282 assert(uopIdx == numMicroops);
1283
1284 for (int i = 0; i < numMicroops - 1; i++) {
1285 microOps[i]->setDelayedCommit();
1286 }
1287 microOps[numMicroops - 1]->setLastMicroop();
1288}
1289
1290VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1291 OpClass __opClass, RegIndex rn, RegIndex vd,
1292 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1293 uint8_t numStructElems, uint8_t index, bool wb,
1294 bool replicate) :
1295 PredMacroOp(mnem, machInst, __opClass),
1296 eSize(0), dataSize(0), numStructElems(0), index(0),
1297 wb(false), replicate(false)
1298
1299{
1300 RegIndex vx = NumFloatV8ArchRegs / 4;
1301 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1302 bool baseIsSP = isSP((IntRegIndex) rnsp);
1303
1304 numMicroops = wb ? 1 : 0;
1305
1306 int eSizeBytes = 1 << eSize;
1307 int totNumBytes = numStructElems * eSizeBytes;
1308 assert(totNumBytes <= 64);
1309
1310 // The guiding principle here is that no more than 16 bytes can be
1311 // transferred at a time
1312 int numMemMicroops = totNumBytes / 16;
1313 int residuum = totNumBytes % 16;
1314 if (residuum)
1315 ++numMemMicroops;
1316 numMicroops += numMemMicroops;
1317
1318 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1319 numMicroops += numMarshalMicroops;
1320
1321 microOps = new StaticInstPtr[numMicroops];
1322 unsigned uopIdx = 0;
1323
1324 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1325 TLB::AllowUnaligned;
1326
1327 int i = 0;
1328 for (; i < numMemMicroops - 1; ++i) {
1329 microOps[uopIdx++] = new MicroNeonLoad64(
1330 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1331 baseIsSP, 16 /* accSize */, eSize);
1332 }
1333 microOps[uopIdx++] = new MicroNeonLoad64(
1334 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1335 residuum ? residuum : 16 /* accSize */, eSize);
1336
1337 // Writeback microop: the post-increment amount is encoded in "Rm": a
1338 // 64-bit general register OR as '11111' for an immediate value equal to
1339 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1340 if (wb) {
1341 if (rm != ((RegIndex) INTREG_X31)) {
1342 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1343 UXTX, 0);
1344 } else {
1345 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1346 totNumBytes);
1347 }
1348 }
1349
1350 for (int i = 0; i < numMarshalMicroops; ++i) {
1351 microOps[uopIdx++] = new MicroUnpackNeon64(
1352 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1353 numStructElems, index, i /* step */, replicate);
1354 }
1355
1356 assert(uopIdx == numMicroops);
1357
1358 for (int i = 0; i < numMicroops - 1; i++) {
1359 microOps[i]->setDelayedCommit();
1360 }
1361 microOps[numMicroops - 1]->setLastMicroop();
1362}
1363
1364VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1365 OpClass __opClass, RegIndex rn, RegIndex vd,
1366 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1367 uint8_t numStructElems, uint8_t index, bool wb,
1368 bool replicate) :
1369 PredMacroOp(mnem, machInst, __opClass),
1370 eSize(0), dataSize(0), numStructElems(0), index(0),
1371 wb(false), replicate(false)
1372{
1373 RegIndex vx = NumFloatV8ArchRegs / 4;
1374 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1375 bool baseIsSP = isSP((IntRegIndex) rnsp);
1376
1377 numMicroops = wb ? 1 : 0;
1378
1379 int eSizeBytes = 1 << eSize;
1380 int totNumBytes = numStructElems * eSizeBytes;
1381 assert(totNumBytes <= 64);
1382
1383 // The guiding principle here is that no more than 16 bytes can be
1384 // transferred at a time
1385 int numMemMicroops = totNumBytes / 16;
1386 int residuum = totNumBytes % 16;
1387 if (residuum)
1388 ++numMemMicroops;
1389 numMicroops += numMemMicroops;
1390
1391 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1392 numMicroops += numMarshalMicroops;
1393
1394 microOps = new StaticInstPtr[numMicroops];
1395 unsigned uopIdx = 0;
1396
1397 for (int i = 0; i < numMarshalMicroops; ++i) {
1398 microOps[uopIdx++] = new MicroPackNeon64(
1399 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1400 numStructElems, index, i /* step */, replicate);
1401 }
1402
1403 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1404 TLB::AllowUnaligned;
1405
1406 int i = 0;
1407 for (; i < numMemMicroops - 1; ++i) {
1408 microOps[uopIdx++] = new MicroNeonStore64(
1409 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1410 baseIsSP, 16 /* accsize */, eSize);
1411 }
1412 microOps[uopIdx++] = new MicroNeonStore64(
1413 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1414 residuum ? residuum : 16 /* accSize */, eSize);
1415
1416 // Writeback microop: the post-increment amount is encoded in "Rm": a
1417 // 64-bit general register OR as '11111' for an immediate value equal to
1418 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1419 if (wb) {
1420 if (rm != ((RegIndex) INTREG_X31)) {
1421 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1422 UXTX, 0);
1423 } else {
1424 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1425 totNumBytes);
1426 }
1427 }
1428
1429 assert(uopIdx == numMicroops);
1430
1431 for (int i = 0; i < numMicroops - 1; i++) {
1432 microOps[i]->setDelayedCommit();
1433 }
1434 microOps[numMicroops - 1]->setLastMicroop();
1435}
1436
1437MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1438 OpClass __opClass, IntRegIndex rn,
1439 RegIndex vd, bool single, bool up,
1440 bool writeback, bool load, uint32_t offset) :
1441 PredMacroOp(mnem, machInst, __opClass)
1442{
1443 int i = 0;
1444
1445 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1446 // to be functionally identical except that fldmx is deprecated. For now
1447 // we'll assume they're otherwise interchangable.
1448 int count = (single ? offset : (offset / 2));
1449 if (count == 0 || count > NumFloatV7ArchRegs)
1450 warn_once("Bad offset field for VFP load/store multiple.\n");
1451 if (count == 0) {
1452 // Force there to be at least one microop so the macroop makes sense.
1453 writeback = true;
1454 }
1455 if (count > NumFloatV7ArchRegs)
1456 count = NumFloatV7ArchRegs;
1457
1458 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1459 microOps = new StaticInstPtr[numMicroops];
1460
1461 int64_t addr = 0;
1462
1463 if (!up)
1464 addr = 4 * offset;
1465
1466 bool tempUp = up;
1467 for (int j = 0; j < count; j++) {
1468 if (load) {
1469 if (single) {
1470 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1471 tempUp, addr);
1472 } else {
1473 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1474 tempUp, addr);
1475 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1476 addr + (up ? 4 : -4));
1477 }
1478 } else {
1479 if (single) {
1480 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1481 tempUp, addr);
1482 } else {
1483 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1484 tempUp, addr);
1485 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1486 addr + (up ? 4 : -4));
1487 }
1488 }
1489 if (!tempUp) {
1490 addr -= (single ? 4 : 8);
1491 // The microops don't handle negative displacement, so turn if we
1492 // hit zero, flip polarity and start adding.
1493 if (addr <= 0) {
1494 tempUp = true;
1495 addr = -addr;
1496 }
1497 } else {
1498 addr += (single ? 4 : 8);
1499 }
1500 }
1501
1502 if (writeback) {
1503 if (up) {
1504 microOps[i++] =
1505 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1506 } else {
1507 microOps[i++] =
1508 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1509 }
1510 }
1511
1512 assert(numMicroops == i);
1513 microOps[numMicroops - 1]->setLastMicroop();
1514
1515 for (StaticInstPtr *curUop = microOps;
1516 !(*curUop)->isLastMicroop(); curUop++) {
1517 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1518 assert(uopPtr);
1519 uopPtr->setDelayedCommit();
1520 }
1521}
1522
1523std::string
1524MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1525{
1526 std::stringstream ss;
1527 printMnemonic(ss);
1528 printReg(ss, ura);
1529 ss << ", ";
1530 printReg(ss, urb);
1531 ss << ", ";
1532 ccprintf(ss, "#%d", imm);
1533 return ss.str();
1534}
1535
1536std::string
1537MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1538{
1539 std::stringstream ss;
1540 printMnemonic(ss);
1541 printReg(ss, ura);
1542 ss << ", ";
1543 printReg(ss, urb);
1544 ss << ", ";
1545 ccprintf(ss, "#%d", imm);
1546 return ss.str();
1547}
1548
1549std::string
1550MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1551{
1552 std::stringstream ss;
1553 printMnemonic(ss);
1554 ss << "[PC,CPSR]";
1555 return ss.str();
1556}
1557
1558std::string
1559MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1560{
1561 std::stringstream ss;
1562 printMnemonic(ss);
1563 printReg(ss, ura);
1564 ccprintf(ss, ", ");
1565 printReg(ss, urb);
1566 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1567 return ss.str();
1568}
1569
1570std::string
1571MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1572{
1573 std::stringstream ss;
1574 printMnemonic(ss);
1575 printReg(ss, ura);
1576 ss << ", ";
1577 printReg(ss, urb);
1578 return ss.str();
1579}
1580
1581std::string
1582MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1583{
1584 std::stringstream ss;
1585 printMnemonic(ss);
1586 printReg(ss, ura);
1587 ss << ", ";
1588 printReg(ss, urb);
1589 ss << ", ";
1590 printReg(ss, urc);
1591 return ss.str();
1592}
1593
1594std::string
1595MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1596{
1597 std::stringstream ss;
1598 printMnemonic(ss);
1599 if (isFloating())
1600 printReg(ss, ura + FP_Reg_Base);
1601 else
1602 printReg(ss, ura);
1603 ss << ", [";
1604 printReg(ss, urb);
1605 ss << ", ";
1606 ccprintf(ss, "#%d", imm);
1607 ss << "]";
1608 return ss.str();
1609}
1610
1611std::string
1612MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1613{
1614 std::stringstream ss;
1615 printMnemonic(ss);
1616 printReg(ss, dest);
1617 ss << ",";
1618 printReg(ss, dest2);
1619 ss << ", [";
1620 printReg(ss, urb);
1621 ss << ", ";
1622 ccprintf(ss, "#%d", imm);
1623 ss << "]";
1624 return ss.str();
1625}
1626
1627}
47#include "arch/arm/generated/decoder.hh"
48#include "arch/arm/insts/neon64_mem.hh"
49
50using namespace std;
51using namespace ArmISAInst;
52
53namespace ArmISA
54{
55
56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57 OpClass __opClass, IntRegIndex rn,
58 bool index, bool up, bool user, bool writeback,
59 bool load, uint32_t reglist) :
60 PredMacroOp(mnem, machInst, __opClass)
61{
62 uint32_t regs = reglist;
63 uint32_t ones = number_of_ones(reglist);
64 uint32_t mem_ops = ones;
65
66 // Copy the base address register if we overwrite it, or if this instruction
67 // is basically a no-op (we have to do something)
68 bool copy_base = (bits(reglist, rn) && load) || !ones;
69 bool force_user = user & !bits(reglist, 15);
70 bool exception_ret = user & bits(reglist, 15);
71 bool pc_temp = load && writeback && bits(reglist, 15);
72
73 if (!ones) {
74 numMicroops = 1;
75 } else if (load) {
76 numMicroops = ((ones + 1) / 2)
77 + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
78 + (copy_base ? 1 : 0)
79 + (writeback? 1 : 0)
80 + (pc_temp ? 1 : 0);
81 } else {
82 numMicroops = ones + (writeback ? 1 : 0);
83 }
84
85 microOps = new StaticInstPtr[numMicroops];
86
87 uint32_t addr = 0;
88
89 if (!up)
90 addr = (ones << 2) - 4;
91
92 if (!index)
93 addr += 4;
94
95 StaticInstPtr *uop = microOps;
96
97 // Add 0 to Rn and stick it in ureg0.
98 // This is equivalent to a move.
99 if (copy_base)
100 *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
101
102 unsigned reg = 0;
103 while (mem_ops != 0) {
104 // Do load operations in pairs if possible
105 if (load && mem_ops >= 2 &&
106 !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
107 // 64-bit memory operation
108 // Find 2 set register bits (clear them after finding)
109 unsigned reg_idx1;
110 unsigned reg_idx2;
111
112 // Find the first register
113 while (!bits(regs, reg)) reg++;
114 replaceBits(regs, reg, 0);
115 reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
116
117 // Find the second register
118 while (!bits(regs, reg)) reg++;
119 replaceBits(regs, reg, 0);
120 reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
121
122 // Load into temp reg if necessary
123 if (reg_idx2 == INTREG_PC && pc_temp)
124 reg_idx2 = INTREG_UREG1;
125
126 // Actually load both registers from memory
127 *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
128 copy_base ? INTREG_UREG0 : rn, up, addr);
129
130 if (!writeback && reg_idx2 == INTREG_PC) {
131 // No writeback if idx==pc, set appropriate flags
132 (*uop)->setFlag(StaticInst::IsControl);
133 (*uop)->setFlag(StaticInst::IsIndirectControl);
134
135 if (!(condCode == COND_AL || condCode == COND_UC))
136 (*uop)->setFlag(StaticInst::IsCondControl);
137 else
138 (*uop)->setFlag(StaticInst::IsUncondControl);
139 }
140
141 if (up) addr += 8;
142 else addr -= 8;
143 mem_ops -= 2;
144 } else {
145 // 32-bit memory operation
146 // Find register for operation
147 unsigned reg_idx;
148 while (!bits(regs, reg)) reg++;
149 replaceBits(regs, reg, 0);
150 reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
151
152 if (load) {
153 if (writeback && reg_idx == INTREG_PC) {
154 // If this instruction changes the PC and performs a
155 // writeback, ensure the pc load/branch is the last uop.
156 // Load into a temp reg here.
157 *uop = new MicroLdrUop(machInst, INTREG_UREG1,
158 copy_base ? INTREG_UREG0 : rn, up, addr);
159 } else if (reg_idx == INTREG_PC && exception_ret) {
160 // Special handling for exception return
161 *uop = new MicroLdrRetUop(machInst, reg_idx,
162 copy_base ? INTREG_UREG0 : rn, up, addr);
163 } else {
164 // standard single load uop
165 *uop = new MicroLdrUop(machInst, reg_idx,
166 copy_base ? INTREG_UREG0 : rn, up, addr);
167 }
168
169 // Loading pc as last operation? Set appropriate flags.
170 if (!writeback && reg_idx == INTREG_PC) {
171 (*uop)->setFlag(StaticInst::IsControl);
172 (*uop)->setFlag(StaticInst::IsIndirectControl);
173
174 if (!(condCode == COND_AL || condCode == COND_UC))
175 (*uop)->setFlag(StaticInst::IsCondControl);
176 else
177 (*uop)->setFlag(StaticInst::IsUncondControl);
178 }
179 } else {
180 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
181 }
182
183 if (up) addr += 4;
184 else addr -= 4;
185 --mem_ops;
186 }
187
188 // Load/store micro-op generated, go to next uop
189 ++uop;
190 }
191
192 if (writeback && ones) {
193 // Perform writeback uop operation
194 if (up)
195 *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
196 else
197 *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
198
199 // Write PC after address writeback?
200 if (pc_temp) {
201 if (exception_ret) {
202 *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
203 } else {
204 *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
205 }
206 (*uop)->setFlag(StaticInst::IsControl);
207 (*uop)->setFlag(StaticInst::IsIndirectControl);
208
209 if (!(condCode == COND_AL || condCode == COND_UC))
210 (*uop)->setFlag(StaticInst::IsCondControl);
211 else
212 (*uop)->setFlag(StaticInst::IsUncondControl);
213
214 if (rn == INTREG_SP)
215 (*uop)->setFlag(StaticInst::IsReturn);
216
217 ++uop;
218 }
219 }
220
221 --uop;
222 (*uop)->setLastMicroop();
223 microOps[0]->setFirstMicroop();
224
225 /* Take the control flags from the last microop for the macroop */
226 if ((*uop)->isControl())
227 setFlag(StaticInst::IsControl);
228 if ((*uop)->isCondCtrl())
229 setFlag(StaticInst::IsCondControl);
230 if ((*uop)->isUncondCtrl())
231 setFlag(StaticInst::IsUncondControl);
232 if ((*uop)->isIndirectCtrl())
233 setFlag(StaticInst::IsIndirectControl);
234 if ((*uop)->isReturn())
235 setFlag(StaticInst::IsReturn);
236
237 for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
238 (*uop)->setDelayedCommit();
239 }
240}
241
242PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
243 uint32_t size, bool fp, bool load, bool noAlloc,
244 bool signExt, bool exclusive, bool acrel,
245 int64_t imm, AddrMode mode,
246 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
247 PredMacroOp(mnem, machInst, __opClass)
248{
249 bool post = (mode == AddrMd_PostIndex);
250 bool writeback = (mode != AddrMd_Offset);
251
252 if (load) {
253 // Use integer rounding to round up loads of size 4
254 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
255 } else {
256 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
257 }
258 microOps = new StaticInstPtr[numMicroops];
259
260 StaticInstPtr *uop = microOps;
261
262 rn = makeSP(rn);
263
264 if (!post) {
265 *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
266 post ? 0 : imm);
267 }
268
269 if (fp) {
270 if (size == 16) {
271 if (load) {
272 *uop++ = new MicroLdFp16Uop(machInst, rt,
273 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
274 *uop++ = new MicroLdFp16Uop(machInst, rt2,
275 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
276 } else {
277 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
278 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
279 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
280 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
281 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
282 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
283 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
284 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
285 }
286 } else if (size == 8) {
287 if (load) {
288 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
289 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
290 } else {
291 *uop++ = new MicroStrFpXImmUop(machInst, rt,
292 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
293 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
294 post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
295 }
296 } else if (size == 4) {
297 if (load) {
298 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
299 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
300 } else {
301 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
302 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
303 }
304 }
305 } else {
306 if (size == 8) {
307 if (load) {
308 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
309 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
310 } else {
311 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
312 0, noAlloc, exclusive, acrel);
313 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
314 size, noAlloc, exclusive, acrel);
315 }
316 } else if (size == 4) {
317 if (load) {
318 if (signExt) {
319 *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
320 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
321 } else {
322 *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
323 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
324 }
325 } else {
326 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
327 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
328 }
329 }
330 }
331
332 if (writeback) {
333 *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
334 post ? imm : 0);
335 }
336
337 assert(uop == &microOps[numMicroops]);
338 (*--uop)->setLastMicroop();
339 microOps[0]->setFirstMicroop();
340
341 for (StaticInstPtr *curUop = microOps;
342 !(*curUop)->isLastMicroop(); curUop++) {
343 (*curUop)->setDelayedCommit();
344 }
345}
346
347BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
348 OpClass __opClass, bool load, IntRegIndex dest,
349 IntRegIndex base, int64_t imm) :
350 PredMacroOp(mnem, machInst, __opClass)
351{
352 numMicroops = load ? 1 : 2;
353 microOps = new StaticInstPtr[numMicroops];
354
355 StaticInstPtr *uop = microOps;
356
357 if (load) {
358 *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
359 } else {
360 *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
361 (*uop)->setDelayedCommit();
362 *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
363 }
364 (*uop)->setLastMicroop();
365 microOps[0]->setFirstMicroop();
366}
367
368BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
369 OpClass __opClass, bool load, IntRegIndex dest,
370 IntRegIndex base, int64_t imm) :
371 PredMacroOp(mnem, machInst, __opClass)
372{
373 numMicroops = load ? 2 : 3;
374 microOps = new StaticInstPtr[numMicroops];
375
376 StaticInstPtr *uop = microOps;
377
378 if (load) {
379 *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
380 } else {
381 *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
382 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
383 }
384 *uop = new MicroAddXiUop(machInst, base, base, imm);
385 (*uop)->setLastMicroop();
386 microOps[0]->setFirstMicroop();
387
388 for (StaticInstPtr *curUop = microOps;
389 !(*curUop)->isLastMicroop(); curUop++) {
390 (*curUop)->setDelayedCommit();
391 }
392}
393
394BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
395 OpClass __opClass, bool load, IntRegIndex dest,
396 IntRegIndex base, int64_t imm) :
397 PredMacroOp(mnem, machInst, __opClass)
398{
399 numMicroops = load ? 2 : 3;
400 microOps = new StaticInstPtr[numMicroops];
401
402 StaticInstPtr *uop = microOps;
403
404 if (load) {
405 *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
406 } else {
407 *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
408 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
409 }
410 *uop = new MicroAddXiUop(machInst, base, base, imm);
411 (*uop)->setLastMicroop();
412 microOps[0]->setFirstMicroop();
413
414 for (StaticInstPtr *curUop = microOps;
415 !(*curUop)->isLastMicroop(); curUop++) {
416 (*curUop)->setDelayedCommit();
417 }
418}
419
420BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
421 OpClass __opClass, bool load, IntRegIndex dest,
422 IntRegIndex base, IntRegIndex offset,
423 ArmExtendType type, int64_t imm) :
424 PredMacroOp(mnem, machInst, __opClass)
425{
426 numMicroops = load ? 1 : 2;
427 microOps = new StaticInstPtr[numMicroops];
428
429 StaticInstPtr *uop = microOps;
430
431 if (load) {
432 *uop = new MicroLdFp16RegUop(machInst, dest, base,
433 offset, type, imm);
434 } else {
435 *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
436 offset, type, imm);
437 (*uop)->setDelayedCommit();
438 *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
439 offset, type, imm);
440 }
441
442 (*uop)->setLastMicroop();
443 microOps[0]->setFirstMicroop();
444}
445
446BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
447 OpClass __opClass, IntRegIndex dest,
448 int64_t imm) :
449 PredMacroOp(mnem, machInst, __opClass)
450{
451 numMicroops = 1;
452 microOps = new StaticInstPtr[numMicroops];
453
454 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
455 microOps[0]->setLastMicroop();
456 microOps[0]->setFirstMicroop();
457}
458
459VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
460 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
461 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
462 PredMacroOp(mnem, machInst, __opClass)
463{
464 assert(regs > 0 && regs <= 4);
465 assert(regs % elems == 0);
466
467 numMicroops = (regs > 2) ? 2 : 1;
468 bool wb = (rm != 15);
469 bool deinterleave = (elems > 1);
470
471 if (wb) numMicroops++;
472 if (deinterleave) numMicroops += (regs / elems);
473 microOps = new StaticInstPtr[numMicroops];
474
475 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
476
477 uint32_t noAlign = TLB::MustBeOne;
478
479 unsigned uopIdx = 0;
480 switch (regs) {
481 case 4:
482 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
483 size, machInst, rMid, rn, 0, align);
484 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
485 size, machInst, rMid + 4, rn, 16, noAlign);
486 break;
487 case 3:
488 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
489 size, machInst, rMid, rn, 0, align);
490 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
491 size, machInst, rMid + 4, rn, 16, noAlign);
492 break;
493 case 2:
494 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
495 size, machInst, rMid, rn, 0, align);
496 break;
497 case 1:
498 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
499 size, machInst, rMid, rn, 0, align);
500 break;
501 default:
502 // Unknown number of registers
503 microOps[uopIdx++] = new Unknown(machInst);
504 }
505 if (wb) {
506 if (rm != 15 && rm != 13) {
507 microOps[uopIdx++] =
508 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
509 } else {
510 microOps[uopIdx++] =
511 new MicroAddiUop(machInst, rn, rn, regs * 8);
512 }
513 }
514 if (deinterleave) {
515 switch (elems) {
516 case 4:
517 assert(regs == 4);
518 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
519 size, machInst, vd * 2, rMid, inc * 2);
520 break;
521 case 3:
522 assert(regs == 3);
523 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
524 size, machInst, vd * 2, rMid, inc * 2);
525 break;
526 case 2:
527 assert(regs == 4 || regs == 2);
528 if (regs == 4) {
529 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
530 size, machInst, vd * 2, rMid, inc * 2);
531 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
532 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
533 } else {
534 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
535 size, machInst, vd * 2, rMid, inc * 2);
536 }
537 break;
538 default:
539 // Bad number of elements to deinterleave
540 microOps[uopIdx++] = new Unknown(machInst);
541 }
542 }
543 assert(uopIdx == numMicroops);
544
545 for (unsigned i = 0; i < numMicroops - 1; i++) {
546 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
547 assert(uopPtr);
548 uopPtr->setDelayedCommit();
549 }
550 microOps[0]->setFirstMicroop();
551 microOps[numMicroops - 1]->setLastMicroop();
552}
553
554VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
555 OpClass __opClass, bool all, unsigned elems,
556 RegIndex rn, RegIndex vd, unsigned regs,
557 unsigned inc, uint32_t size, uint32_t align,
558 RegIndex rm, unsigned lane) :
559 PredMacroOp(mnem, machInst, __opClass)
560{
561 assert(regs > 0 && regs <= 4);
562 assert(regs % elems == 0);
563
564 unsigned eBytes = (1 << size);
565 unsigned loadSize = eBytes * elems;
566 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
567 sizeof(FloatRegBits);
568
569 assert(loadRegs > 0 && loadRegs <= 4);
570
571 numMicroops = 1;
572 bool wb = (rm != 15);
573
574 if (wb) numMicroops++;
575 numMicroops += (regs / elems);
576 microOps = new StaticInstPtr[numMicroops];
577
578 RegIndex ufp0 = NumFloatV7ArchRegs;
579
580 unsigned uopIdx = 0;
581 switch (loadSize) {
582 case 1:
583 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
584 machInst, ufp0, rn, 0, align);
585 break;
586 case 2:
587 if (eBytes == 2) {
588 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
589 machInst, ufp0, rn, 0, align);
590 } else {
591 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
592 machInst, ufp0, rn, 0, align);
593 }
594 break;
595 case 3:
596 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
597 machInst, ufp0, rn, 0, align);
598 break;
599 case 4:
600 switch (eBytes) {
601 case 1:
602 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
603 machInst, ufp0, rn, 0, align);
604 break;
605 case 2:
606 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
607 machInst, ufp0, rn, 0, align);
608 break;
609 case 4:
610 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
611 machInst, ufp0, rn, 0, align);
612 break;
613 }
614 break;
615 case 6:
616 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
617 machInst, ufp0, rn, 0, align);
618 break;
619 case 8:
620 switch (eBytes) {
621 case 2:
622 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
623 machInst, ufp0, rn, 0, align);
624 break;
625 case 4:
626 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
627 machInst, ufp0, rn, 0, align);
628 break;
629 }
630 break;
631 case 12:
632 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
633 machInst, ufp0, rn, 0, align);
634 break;
635 case 16:
636 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
637 machInst, ufp0, rn, 0, align);
638 break;
639 default:
640 // Unrecognized load size
641 microOps[uopIdx++] = new Unknown(machInst);
642 }
643 if (wb) {
644 if (rm != 15 && rm != 13) {
645 microOps[uopIdx++] =
646 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
647 } else {
648 microOps[uopIdx++] =
649 new MicroAddiUop(machInst, rn, rn, loadSize);
650 }
651 }
652 switch (elems) {
653 case 4:
654 assert(regs == 4);
655 switch (size) {
656 case 0:
657 if (all) {
658 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
659 machInst, vd * 2, ufp0, inc * 2);
660 } else {
661 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
662 machInst, vd * 2, ufp0, inc * 2, lane);
663 }
664 break;
665 case 1:
666 if (all) {
667 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
668 machInst, vd * 2, ufp0, inc * 2);
669 } else {
670 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
671 machInst, vd * 2, ufp0, inc * 2, lane);
672 }
673 break;
674 case 2:
675 if (all) {
676 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
677 machInst, vd * 2, ufp0, inc * 2);
678 } else {
679 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
680 machInst, vd * 2, ufp0, inc * 2, lane);
681 }
682 break;
683 default:
684 // Bad size
685 microOps[uopIdx++] = new Unknown(machInst);
686 break;
687 }
688 break;
689 case 3:
690 assert(regs == 3);
691 switch (size) {
692 case 0:
693 if (all) {
694 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
695 machInst, vd * 2, ufp0, inc * 2);
696 } else {
697 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
698 machInst, vd * 2, ufp0, inc * 2, lane);
699 }
700 break;
701 case 1:
702 if (all) {
703 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
704 machInst, vd * 2, ufp0, inc * 2);
705 } else {
706 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
707 machInst, vd * 2, ufp0, inc * 2, lane);
708 }
709 break;
710 case 2:
711 if (all) {
712 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
713 machInst, vd * 2, ufp0, inc * 2);
714 } else {
715 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
716 machInst, vd * 2, ufp0, inc * 2, lane);
717 }
718 break;
719 default:
720 // Bad size
721 microOps[uopIdx++] = new Unknown(machInst);
722 break;
723 }
724 break;
725 case 2:
726 assert(regs == 2);
727 assert(loadRegs <= 2);
728 switch (size) {
729 case 0:
730 if (all) {
731 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
732 machInst, vd * 2, ufp0, inc * 2);
733 } else {
734 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
735 machInst, vd * 2, ufp0, inc * 2, lane);
736 }
737 break;
738 case 1:
739 if (all) {
740 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
741 machInst, vd * 2, ufp0, inc * 2);
742 } else {
743 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
744 machInst, vd * 2, ufp0, inc * 2, lane);
745 }
746 break;
747 case 2:
748 if (all) {
749 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
750 machInst, vd * 2, ufp0, inc * 2);
751 } else {
752 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
753 machInst, vd * 2, ufp0, inc * 2, lane);
754 }
755 break;
756 default:
757 // Bad size
758 microOps[uopIdx++] = new Unknown(machInst);
759 break;
760 }
761 break;
762 case 1:
763 assert(regs == 1 || (all && regs == 2));
764 assert(loadRegs <= 2);
765 for (unsigned offset = 0; offset < regs; offset++) {
766 switch (size) {
767 case 0:
768 if (all) {
769 microOps[uopIdx++] =
770 new MicroUnpackAllNeon2to2Uop<uint8_t>(
771 machInst, (vd + offset) * 2, ufp0, inc * 2);
772 } else {
773 microOps[uopIdx++] =
774 new MicroUnpackNeon2to2Uop<uint8_t>(
775 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
776 }
777 break;
778 case 1:
779 if (all) {
780 microOps[uopIdx++] =
781 new MicroUnpackAllNeon2to2Uop<uint16_t>(
782 machInst, (vd + offset) * 2, ufp0, inc * 2);
783 } else {
784 microOps[uopIdx++] =
785 new MicroUnpackNeon2to2Uop<uint16_t>(
786 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
787 }
788 break;
789 case 2:
790 if (all) {
791 microOps[uopIdx++] =
792 new MicroUnpackAllNeon2to2Uop<uint32_t>(
793 machInst, (vd + offset) * 2, ufp0, inc * 2);
794 } else {
795 microOps[uopIdx++] =
796 new MicroUnpackNeon2to2Uop<uint32_t>(
797 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
798 }
799 break;
800 default:
801 // Bad size
802 microOps[uopIdx++] = new Unknown(machInst);
803 break;
804 }
805 }
806 break;
807 default:
808 // Bad number of elements to unpack
809 microOps[uopIdx++] = new Unknown(machInst);
810 }
811 assert(uopIdx == numMicroops);
812
813 for (unsigned i = 0; i < numMicroops - 1; i++) {
814 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
815 assert(uopPtr);
816 uopPtr->setDelayedCommit();
817 }
818 microOps[0]->setFirstMicroop();
819 microOps[numMicroops - 1]->setLastMicroop();
820}
821
822VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
823 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
824 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
825 PredMacroOp(mnem, machInst, __opClass)
826{
827 assert(regs > 0 && regs <= 4);
828 assert(regs % elems == 0);
829
830 numMicroops = (regs > 2) ? 2 : 1;
831 bool wb = (rm != 15);
832 bool interleave = (elems > 1);
833
834 if (wb) numMicroops++;
835 if (interleave) numMicroops += (regs / elems);
836 microOps = new StaticInstPtr[numMicroops];
837
838 uint32_t noAlign = TLB::MustBeOne;
839
840 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
841
842 unsigned uopIdx = 0;
843 if (interleave) {
844 switch (elems) {
845 case 4:
846 assert(regs == 4);
847 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
848 size, machInst, rMid, vd * 2, inc * 2);
849 break;
850 case 3:
851 assert(regs == 3);
852 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
853 size, machInst, rMid, vd * 2, inc * 2);
854 break;
855 case 2:
856 assert(regs == 4 || regs == 2);
857 if (regs == 4) {
858 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
859 size, machInst, rMid, vd * 2, inc * 2);
860 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
861 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
862 } else {
863 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
864 size, machInst, rMid, vd * 2, inc * 2);
865 }
866 break;
867 default:
868 // Bad number of elements to interleave
869 microOps[uopIdx++] = new Unknown(machInst);
870 }
871 }
872 switch (regs) {
873 case 4:
874 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
875 size, machInst, rMid, rn, 0, align);
876 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
877 size, machInst, rMid + 4, rn, 16, noAlign);
878 break;
879 case 3:
880 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
881 size, machInst, rMid, rn, 0, align);
882 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
883 size, machInst, rMid + 4, rn, 16, noAlign);
884 break;
885 case 2:
886 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
887 size, machInst, rMid, rn, 0, align);
888 break;
889 case 1:
890 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
891 size, machInst, rMid, rn, 0, align);
892 break;
893 default:
894 // Unknown number of registers
895 microOps[uopIdx++] = new Unknown(machInst);
896 }
897 if (wb) {
898 if (rm != 15 && rm != 13) {
899 microOps[uopIdx++] =
900 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
901 } else {
902 microOps[uopIdx++] =
903 new MicroAddiUop(machInst, rn, rn, regs * 8);
904 }
905 }
906 assert(uopIdx == numMicroops);
907
908 for (unsigned i = 0; i < numMicroops - 1; i++) {
909 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
910 assert(uopPtr);
911 uopPtr->setDelayedCommit();
912 }
913 microOps[0]->setFirstMicroop();
914 microOps[numMicroops - 1]->setLastMicroop();
915}
916
917VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
918 OpClass __opClass, bool all, unsigned elems,
919 RegIndex rn, RegIndex vd, unsigned regs,
920 unsigned inc, uint32_t size, uint32_t align,
921 RegIndex rm, unsigned lane) :
922 PredMacroOp(mnem, machInst, __opClass)
923{
924 assert(!all);
925 assert(regs > 0 && regs <= 4);
926 assert(regs % elems == 0);
927
928 unsigned eBytes = (1 << size);
929 unsigned storeSize = eBytes * elems;
930 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
931 sizeof(FloatRegBits);
932
933 assert(storeRegs > 0 && storeRegs <= 4);
934
935 numMicroops = 1;
936 bool wb = (rm != 15);
937
938 if (wb) numMicroops++;
939 numMicroops += (regs / elems);
940 microOps = new StaticInstPtr[numMicroops];
941
942 RegIndex ufp0 = NumFloatV7ArchRegs;
943
944 unsigned uopIdx = 0;
945 switch (elems) {
946 case 4:
947 assert(regs == 4);
948 switch (size) {
949 case 0:
950 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
951 machInst, ufp0, vd * 2, inc * 2, lane);
952 break;
953 case 1:
954 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
955 machInst, ufp0, vd * 2, inc * 2, lane);
956 break;
957 case 2:
958 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
959 machInst, ufp0, vd * 2, inc * 2, lane);
960 break;
961 default:
962 // Bad size
963 microOps[uopIdx++] = new Unknown(machInst);
964 break;
965 }
966 break;
967 case 3:
968 assert(regs == 3);
969 switch (size) {
970 case 0:
971 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
972 machInst, ufp0, vd * 2, inc * 2, lane);
973 break;
974 case 1:
975 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
976 machInst, ufp0, vd * 2, inc * 2, lane);
977 break;
978 case 2:
979 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
980 machInst, ufp0, vd * 2, inc * 2, lane);
981 break;
982 default:
983 // Bad size
984 microOps[uopIdx++] = new Unknown(machInst);
985 break;
986 }
987 break;
988 case 2:
989 assert(regs == 2);
990 assert(storeRegs <= 2);
991 switch (size) {
992 case 0:
993 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
994 machInst, ufp0, vd * 2, inc * 2, lane);
995 break;
996 case 1:
997 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
998 machInst, ufp0, vd * 2, inc * 2, lane);
999 break;
1000 case 2:
1001 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1002 machInst, ufp0, vd * 2, inc * 2, lane);
1003 break;
1004 default:
1005 // Bad size
1006 microOps[uopIdx++] = new Unknown(machInst);
1007 break;
1008 }
1009 break;
1010 case 1:
1011 assert(regs == 1 || (all && regs == 2));
1012 assert(storeRegs <= 2);
1013 for (unsigned offset = 0; offset < regs; offset++) {
1014 switch (size) {
1015 case 0:
1016 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1017 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1018 break;
1019 case 1:
1020 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1021 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1022 break;
1023 case 2:
1024 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1025 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1026 break;
1027 default:
1028 // Bad size
1029 microOps[uopIdx++] = new Unknown(machInst);
1030 break;
1031 }
1032 }
1033 break;
1034 default:
1035 // Bad number of elements to unpack
1036 microOps[uopIdx++] = new Unknown(machInst);
1037 }
1038 switch (storeSize) {
1039 case 1:
1040 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1041 machInst, ufp0, rn, 0, align);
1042 break;
1043 case 2:
1044 if (eBytes == 2) {
1045 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1046 machInst, ufp0, rn, 0, align);
1047 } else {
1048 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1049 machInst, ufp0, rn, 0, align);
1050 }
1051 break;
1052 case 3:
1053 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1054 machInst, ufp0, rn, 0, align);
1055 break;
1056 case 4:
1057 switch (eBytes) {
1058 case 1:
1059 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1060 machInst, ufp0, rn, 0, align);
1061 break;
1062 case 2:
1063 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1064 machInst, ufp0, rn, 0, align);
1065 break;
1066 case 4:
1067 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1068 machInst, ufp0, rn, 0, align);
1069 break;
1070 }
1071 break;
1072 case 6:
1073 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1074 machInst, ufp0, rn, 0, align);
1075 break;
1076 case 8:
1077 switch (eBytes) {
1078 case 2:
1079 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1080 machInst, ufp0, rn, 0, align);
1081 break;
1082 case 4:
1083 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1084 machInst, ufp0, rn, 0, align);
1085 break;
1086 }
1087 break;
1088 case 12:
1089 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1090 machInst, ufp0, rn, 0, align);
1091 break;
1092 case 16:
1093 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1094 machInst, ufp0, rn, 0, align);
1095 break;
1096 default:
1097 // Bad store size
1098 microOps[uopIdx++] = new Unknown(machInst);
1099 }
1100 if (wb) {
1101 if (rm != 15 && rm != 13) {
1102 microOps[uopIdx++] =
1103 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1104 } else {
1105 microOps[uopIdx++] =
1106 new MicroAddiUop(machInst, rn, rn, storeSize);
1107 }
1108 }
1109 assert(uopIdx == numMicroops);
1110
1111 for (unsigned i = 0; i < numMicroops - 1; i++) {
1112 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1113 assert(uopPtr);
1114 uopPtr->setDelayedCommit();
1115 }
1116 microOps[0]->setFirstMicroop();
1117 microOps[numMicroops - 1]->setLastMicroop();
1118}
1119
1120VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1121 OpClass __opClass, RegIndex rn, RegIndex vd,
1122 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1123 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1124 PredMacroOp(mnem, machInst, __opClass)
1125{
1126 RegIndex vx = NumFloatV8ArchRegs / 4;
1127 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1128 bool baseIsSP = isSP((IntRegIndex) rnsp);
1129
1130 numMicroops = wb ? 1 : 0;
1131
1132 int totNumBytes = numRegs * dataSize / 8;
1133 assert(totNumBytes <= 64);
1134
1135 // The guiding principle here is that no more than 16 bytes can be
1136 // transferred at a time
1137 int numMemMicroops = totNumBytes / 16;
1138 int residuum = totNumBytes % 16;
1139 if (residuum)
1140 ++numMemMicroops;
1141 numMicroops += numMemMicroops;
1142
1143 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1144 numMicroops += numMarshalMicroops;
1145
1146 microOps = new StaticInstPtr[numMicroops];
1147 unsigned uopIdx = 0;
1148 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1149 TLB::AllowUnaligned;
1150
1151 int i = 0;
1152 for (; i < numMemMicroops - 1; ++i) {
1153 microOps[uopIdx++] = new MicroNeonLoad64(
1154 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1155 baseIsSP, 16 /* accSize */, eSize);
1156 }
1157 microOps[uopIdx++] = new MicroNeonLoad64(
1158 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1159 residuum ? residuum : 16 /* accSize */, eSize);
1160
1161 // Writeback microop: the post-increment amount is encoded in "Rm": a
1162 // 64-bit general register OR as '11111' for an immediate value equal to
1163 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1164 if (wb) {
1165 if (rm != ((RegIndex) INTREG_X31)) {
1166 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1167 UXTX, 0);
1168 } else {
1169 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1170 totNumBytes);
1171 }
1172 }
1173
1174 for (int i = 0; i < numMarshalMicroops; ++i) {
1175 switch(numRegs) {
1176 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1177 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1178 numStructElems, 1, i /* step */);
1179 break;
1180 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1181 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1182 numStructElems, 2, i /* step */);
1183 break;
1184 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1185 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1186 numStructElems, 3, i /* step */);
1187 break;
1188 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1189 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1190 numStructElems, 4, i /* step */);
1191 break;
1192 default: panic("Invalid number of registers");
1193 }
1194
1195 }
1196
1197 assert(uopIdx == numMicroops);
1198
1199 for (int i = 0; i < numMicroops - 1; ++i) {
1200 microOps[i]->setDelayedCommit();
1201 }
1202 microOps[numMicroops - 1]->setLastMicroop();
1203}
1204
1205VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1206 OpClass __opClass, RegIndex rn, RegIndex vd,
1207 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1208 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1209 PredMacroOp(mnem, machInst, __opClass)
1210{
1211 RegIndex vx = NumFloatV8ArchRegs / 4;
1212 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1213 bool baseIsSP = isSP((IntRegIndex) rnsp);
1214
1215 numMicroops = wb ? 1 : 0;
1216
1217 int totNumBytes = numRegs * dataSize / 8;
1218 assert(totNumBytes <= 64);
1219
1220 // The guiding principle here is that no more than 16 bytes can be
1221 // transferred at a time
1222 int numMemMicroops = totNumBytes / 16;
1223 int residuum = totNumBytes % 16;
1224 if (residuum)
1225 ++numMemMicroops;
1226 numMicroops += numMemMicroops;
1227
1228 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1229 numMicroops += numMarshalMicroops;
1230
1231 microOps = new StaticInstPtr[numMicroops];
1232 unsigned uopIdx = 0;
1233
1234 for (int i = 0; i < numMarshalMicroops; ++i) {
1235 switch (numRegs) {
1236 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1237 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1238 numStructElems, 1, i /* step */);
1239 break;
1240 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1241 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1242 numStructElems, 2, i /* step */);
1243 break;
1244 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1245 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1246 numStructElems, 3, i /* step */);
1247 break;
1248 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1249 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1250 numStructElems, 4, i /* step */);
1251 break;
1252 default: panic("Invalid number of registers");
1253 }
1254 }
1255
1256 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1257 TLB::AllowUnaligned;
1258
1259 int i = 0;
1260 for (; i < numMemMicroops - 1; ++i) {
1261 microOps[uopIdx++] = new MicroNeonStore64(
1262 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1263 baseIsSP, 16 /* accSize */, eSize);
1264 }
1265 microOps[uopIdx++] = new MicroNeonStore64(
1266 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1267 residuum ? residuum : 16 /* accSize */, eSize);
1268
1269 // Writeback microop: the post-increment amount is encoded in "Rm": a
1270 // 64-bit general register OR as '11111' for an immediate value equal to
1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1272 if (wb) {
1273 if (rm != ((RegIndex) INTREG_X31)) {
1274 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1275 UXTX, 0);
1276 } else {
1277 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1278 totNumBytes);
1279 }
1280 }
1281
1282 assert(uopIdx == numMicroops);
1283
1284 for (int i = 0; i < numMicroops - 1; i++) {
1285 microOps[i]->setDelayedCommit();
1286 }
1287 microOps[numMicroops - 1]->setLastMicroop();
1288}
1289
1290VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1291 OpClass __opClass, RegIndex rn, RegIndex vd,
1292 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1293 uint8_t numStructElems, uint8_t index, bool wb,
1294 bool replicate) :
1295 PredMacroOp(mnem, machInst, __opClass),
1296 eSize(0), dataSize(0), numStructElems(0), index(0),
1297 wb(false), replicate(false)
1298
1299{
1300 RegIndex vx = NumFloatV8ArchRegs / 4;
1301 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1302 bool baseIsSP = isSP((IntRegIndex) rnsp);
1303
1304 numMicroops = wb ? 1 : 0;
1305
1306 int eSizeBytes = 1 << eSize;
1307 int totNumBytes = numStructElems * eSizeBytes;
1308 assert(totNumBytes <= 64);
1309
1310 // The guiding principle here is that no more than 16 bytes can be
1311 // transferred at a time
1312 int numMemMicroops = totNumBytes / 16;
1313 int residuum = totNumBytes % 16;
1314 if (residuum)
1315 ++numMemMicroops;
1316 numMicroops += numMemMicroops;
1317
1318 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1319 numMicroops += numMarshalMicroops;
1320
1321 microOps = new StaticInstPtr[numMicroops];
1322 unsigned uopIdx = 0;
1323
1324 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1325 TLB::AllowUnaligned;
1326
1327 int i = 0;
1328 for (; i < numMemMicroops - 1; ++i) {
1329 microOps[uopIdx++] = new MicroNeonLoad64(
1330 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1331 baseIsSP, 16 /* accSize */, eSize);
1332 }
1333 microOps[uopIdx++] = new MicroNeonLoad64(
1334 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1335 residuum ? residuum : 16 /* accSize */, eSize);
1336
1337 // Writeback microop: the post-increment amount is encoded in "Rm": a
1338 // 64-bit general register OR as '11111' for an immediate value equal to
1339 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1340 if (wb) {
1341 if (rm != ((RegIndex) INTREG_X31)) {
1342 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1343 UXTX, 0);
1344 } else {
1345 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1346 totNumBytes);
1347 }
1348 }
1349
1350 for (int i = 0; i < numMarshalMicroops; ++i) {
1351 microOps[uopIdx++] = new MicroUnpackNeon64(
1352 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1353 numStructElems, index, i /* step */, replicate);
1354 }
1355
1356 assert(uopIdx == numMicroops);
1357
1358 for (int i = 0; i < numMicroops - 1; i++) {
1359 microOps[i]->setDelayedCommit();
1360 }
1361 microOps[numMicroops - 1]->setLastMicroop();
1362}
1363
1364VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1365 OpClass __opClass, RegIndex rn, RegIndex vd,
1366 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1367 uint8_t numStructElems, uint8_t index, bool wb,
1368 bool replicate) :
1369 PredMacroOp(mnem, machInst, __opClass),
1370 eSize(0), dataSize(0), numStructElems(0), index(0),
1371 wb(false), replicate(false)
1372{
1373 RegIndex vx = NumFloatV8ArchRegs / 4;
1374 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1375 bool baseIsSP = isSP((IntRegIndex) rnsp);
1376
1377 numMicroops = wb ? 1 : 0;
1378
1379 int eSizeBytes = 1 << eSize;
1380 int totNumBytes = numStructElems * eSizeBytes;
1381 assert(totNumBytes <= 64);
1382
1383 // The guiding principle here is that no more than 16 bytes can be
1384 // transferred at a time
1385 int numMemMicroops = totNumBytes / 16;
1386 int residuum = totNumBytes % 16;
1387 if (residuum)
1388 ++numMemMicroops;
1389 numMicroops += numMemMicroops;
1390
1391 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1392 numMicroops += numMarshalMicroops;
1393
1394 microOps = new StaticInstPtr[numMicroops];
1395 unsigned uopIdx = 0;
1396
1397 for (int i = 0; i < numMarshalMicroops; ++i) {
1398 microOps[uopIdx++] = new MicroPackNeon64(
1399 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1400 numStructElems, index, i /* step */, replicate);
1401 }
1402
1403 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1404 TLB::AllowUnaligned;
1405
1406 int i = 0;
1407 for (; i < numMemMicroops - 1; ++i) {
1408 microOps[uopIdx++] = new MicroNeonStore64(
1409 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1410 baseIsSP, 16 /* accsize */, eSize);
1411 }
1412 microOps[uopIdx++] = new MicroNeonStore64(
1413 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1414 residuum ? residuum : 16 /* accSize */, eSize);
1415
1416 // Writeback microop: the post-increment amount is encoded in "Rm": a
1417 // 64-bit general register OR as '11111' for an immediate value equal to
1418 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1419 if (wb) {
1420 if (rm != ((RegIndex) INTREG_X31)) {
1421 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1422 UXTX, 0);
1423 } else {
1424 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1425 totNumBytes);
1426 }
1427 }
1428
1429 assert(uopIdx == numMicroops);
1430
1431 for (int i = 0; i < numMicroops - 1; i++) {
1432 microOps[i]->setDelayedCommit();
1433 }
1434 microOps[numMicroops - 1]->setLastMicroop();
1435}
1436
1437MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1438 OpClass __opClass, IntRegIndex rn,
1439 RegIndex vd, bool single, bool up,
1440 bool writeback, bool load, uint32_t offset) :
1441 PredMacroOp(mnem, machInst, __opClass)
1442{
1443 int i = 0;
1444
1445 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1446 // to be functionally identical except that fldmx is deprecated. For now
1447 // we'll assume they're otherwise interchangable.
1448 int count = (single ? offset : (offset / 2));
1449 if (count == 0 || count > NumFloatV7ArchRegs)
1450 warn_once("Bad offset field for VFP load/store multiple.\n");
1451 if (count == 0) {
1452 // Force there to be at least one microop so the macroop makes sense.
1453 writeback = true;
1454 }
1455 if (count > NumFloatV7ArchRegs)
1456 count = NumFloatV7ArchRegs;
1457
1458 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1459 microOps = new StaticInstPtr[numMicroops];
1460
1461 int64_t addr = 0;
1462
1463 if (!up)
1464 addr = 4 * offset;
1465
1466 bool tempUp = up;
1467 for (int j = 0; j < count; j++) {
1468 if (load) {
1469 if (single) {
1470 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1471 tempUp, addr);
1472 } else {
1473 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1474 tempUp, addr);
1475 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1476 addr + (up ? 4 : -4));
1477 }
1478 } else {
1479 if (single) {
1480 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1481 tempUp, addr);
1482 } else {
1483 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1484 tempUp, addr);
1485 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1486 addr + (up ? 4 : -4));
1487 }
1488 }
1489 if (!tempUp) {
1490 addr -= (single ? 4 : 8);
1491 // The microops don't handle negative displacement, so turn if we
1492 // hit zero, flip polarity and start adding.
1493 if (addr <= 0) {
1494 tempUp = true;
1495 addr = -addr;
1496 }
1497 } else {
1498 addr += (single ? 4 : 8);
1499 }
1500 }
1501
1502 if (writeback) {
1503 if (up) {
1504 microOps[i++] =
1505 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1506 } else {
1507 microOps[i++] =
1508 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1509 }
1510 }
1511
1512 assert(numMicroops == i);
1513 microOps[numMicroops - 1]->setLastMicroop();
1514
1515 for (StaticInstPtr *curUop = microOps;
1516 !(*curUop)->isLastMicroop(); curUop++) {
1517 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1518 assert(uopPtr);
1519 uopPtr->setDelayedCommit();
1520 }
1521}
1522
1523std::string
1524MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1525{
1526 std::stringstream ss;
1527 printMnemonic(ss);
1528 printReg(ss, ura);
1529 ss << ", ";
1530 printReg(ss, urb);
1531 ss << ", ";
1532 ccprintf(ss, "#%d", imm);
1533 return ss.str();
1534}
1535
1536std::string
1537MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1538{
1539 std::stringstream ss;
1540 printMnemonic(ss);
1541 printReg(ss, ura);
1542 ss << ", ";
1543 printReg(ss, urb);
1544 ss << ", ";
1545 ccprintf(ss, "#%d", imm);
1546 return ss.str();
1547}
1548
1549std::string
1550MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1551{
1552 std::stringstream ss;
1553 printMnemonic(ss);
1554 ss << "[PC,CPSR]";
1555 return ss.str();
1556}
1557
1558std::string
1559MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1560{
1561 std::stringstream ss;
1562 printMnemonic(ss);
1563 printReg(ss, ura);
1564 ccprintf(ss, ", ");
1565 printReg(ss, urb);
1566 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1567 return ss.str();
1568}
1569
1570std::string
1571MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1572{
1573 std::stringstream ss;
1574 printMnemonic(ss);
1575 printReg(ss, ura);
1576 ss << ", ";
1577 printReg(ss, urb);
1578 return ss.str();
1579}
1580
1581std::string
1582MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1583{
1584 std::stringstream ss;
1585 printMnemonic(ss);
1586 printReg(ss, ura);
1587 ss << ", ";
1588 printReg(ss, urb);
1589 ss << ", ";
1590 printReg(ss, urc);
1591 return ss.str();
1592}
1593
1594std::string
1595MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1596{
1597 std::stringstream ss;
1598 printMnemonic(ss);
1599 if (isFloating())
1600 printReg(ss, ura + FP_Reg_Base);
1601 else
1602 printReg(ss, ura);
1603 ss << ", [";
1604 printReg(ss, urb);
1605 ss << ", ";
1606 ccprintf(ss, "#%d", imm);
1607 ss << "]";
1608 return ss.str();
1609}
1610
1611std::string
1612MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1613{
1614 std::stringstream ss;
1615 printMnemonic(ss);
1616 printReg(ss, dest);
1617 ss << ",";
1618 printReg(ss, dest2);
1619 ss << ", [";
1620 printReg(ss, urb);
1621 ss << ", ";
1622 ccprintf(ss, "#%d", imm);
1623 ss << "]";
1624 return ss.str();
1625}
1626
1627}