Deleted Added
sdiff udiff text old ( 10180:e40b35147270 ) new ( 10199:6cf40d777682 )
full compact
1/*
2 * Copyright (c) 2010-2013 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include <sstream>
44
45#include "arch/arm/insts/macromem.hh"
46
47#include "arch/arm/generated/decoder.hh"
48#include "arch/arm/insts/neon64_mem.hh"
49
50using namespace std;
51using namespace ArmISAInst;
52
53namespace ArmISA
54{
55
56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57 OpClass __opClass, IntRegIndex rn,
58 bool index, bool up, bool user, bool writeback,
59 bool load, uint32_t reglist) :
60 PredMacroOp(mnem, machInst, __opClass)
61{
62 uint32_t regs = reglist;
63 uint32_t ones = number_of_ones(reglist);
64 // Remember that writeback adds a uop or two and the temp register adds one
65 numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1;
66
67 // It's technically legal to do a lot of nothing
68 if (!ones)
69 numMicroops = 1;
70
71 microOps = new StaticInstPtr[numMicroops];
72 uint32_t addr = 0;
73
74 if (!up)
75 addr = (ones << 2) - 4;
76
77 if (!index)
78 addr += 4;
79
80 StaticInstPtr *uop = microOps;
81
82 // Add 0 to Rn and stick it in ureg0.
83 // This is equivalent to a move.
84 *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
85
86 unsigned reg = 0;
87 unsigned regIdx = 0;
88 bool force_user = user & !bits(reglist, 15);
89 bool exception_ret = user & bits(reglist, 15);
90
91 for (int i = 0; i < ones; i++) {
92 // Find the next register.
93 while (!bits(regs, reg))
94 reg++;
95 replaceBits(regs, reg, 0);
96
97 regIdx = reg;
98 if (force_user) {
99 regIdx = intRegInMode(MODE_USER, regIdx);
100 }
101
102 if (load) {
103 if (writeback && i == ones - 1) {
104 // If it's a writeback and this is the last register
105 // do the load into a temporary register which we'll move
106 // into the final one later
107 *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0,
108 up, addr);
109 } else {
110 // Otherwise just do it normally
111 if (reg == INTREG_PC && exception_ret) {
112 // This must be the exception return form of ldm.
113 *++uop = new MicroLdrRetUop(machInst, regIdx,
114 INTREG_UREG0, up, addr);
115 if (!(condCode == COND_AL || condCode == COND_UC))
116 (*uop)->setFlag(StaticInst::IsCondControl);
117 else
118 (*uop)->setFlag(StaticInst::IsUncondControl);
119 } else {
120 *++uop = new MicroLdrUop(machInst, regIdx,
121 INTREG_UREG0, up, addr);
122 if (reg == INTREG_PC) {
123 (*uop)->setFlag(StaticInst::IsControl);
124 if (!(condCode == COND_AL || condCode == COND_UC))
125 (*uop)->setFlag(StaticInst::IsCondControl);
126 else
127 (*uop)->setFlag(StaticInst::IsUncondControl);
128 (*uop)->setFlag(StaticInst::IsIndirectControl);
129 }
130 }
131 }
132 } else {
133 *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);
134 }
135
136 if (up)
137 addr += 4;
138 else
139 addr -= 4;
140 }
141
142 if (writeback && ones) {
143 // put the register update after we're done all loading
144 if (up)
145 *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4);
146 else
147 *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4);
148
149 // If this was a load move the last temporary value into place
150 // this way we can't take an exception after we update the base
151 // register.
152 if (load && reg == INTREG_PC && exception_ret) {
153 *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
154 if (!(condCode == COND_AL || condCode == COND_UC))
155 (*uop)->setFlag(StaticInst::IsCondControl);
156 else
157 (*uop)->setFlag(StaticInst::IsUncondControl);
158 } else if (load) {
159 *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1);
160 if (reg == INTREG_PC) {
161 (*uop)->setFlag(StaticInst::IsControl);
162 (*uop)->setFlag(StaticInst::IsCondControl);
163 (*uop)->setFlag(StaticInst::IsIndirectControl);
164 // This is created as a RAS POP
165 if (rn == INTREG_SP)
166 (*uop)->setFlag(StaticInst::IsReturn);
167
168 }
169 }
170 }
171
172 (*uop)->setLastMicroop();
173
174 for (StaticInstPtr *curUop = microOps;
175 !(*curUop)->isLastMicroop(); curUop++) {
176 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
177 assert(uopPtr);
178 uopPtr->setDelayedCommit();
179 }
180}
181
182PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
183 uint32_t size, bool fp, bool load, bool noAlloc,
184 bool signExt, bool exclusive, bool acrel,
185 int64_t imm, AddrMode mode,
186 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
187 PredMacroOp(mnem, machInst, __opClass)
188{
189 bool writeback = (mode != AddrMd_Offset);
190 numMicroops = 1 + (size / 4) + (writeback ? 1 : 0);
191 microOps = new StaticInstPtr[numMicroops];
192
193 StaticInstPtr *uop = microOps;
194
195 bool post = (mode == AddrMd_PostIndex);
196
197 rn = makeSP(rn);
198
199 *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm);
200
201 if (fp) {
202 if (size == 16) {
203 if (load) {
204 *++uop = new MicroLdrQBFpXImmUop(machInst, rt,
205 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
206 *++uop = new MicroLdrQTFpXImmUop(machInst, rt,
207 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
208 *++uop = new MicroLdrQBFpXImmUop(machInst, rt2,
209 INTREG_UREG0, 16, noAlloc, exclusive, acrel);
210 *++uop = new MicroLdrQTFpXImmUop(machInst, rt2,
211 INTREG_UREG0, 16, noAlloc, exclusive, acrel);
212 } else {
213 *++uop = new MicroStrQBFpXImmUop(machInst, rt,
214 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
215 *++uop = new MicroStrQTFpXImmUop(machInst, rt,
216 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
217 *++uop = new MicroStrQBFpXImmUop(machInst, rt2,
218 INTREG_UREG0, 16, noAlloc, exclusive, acrel);
219 *++uop = new MicroStrQTFpXImmUop(machInst, rt2,
220 INTREG_UREG0, 16, noAlloc, exclusive, acrel);
221 }
222 } else if (size == 8) {
223 if (load) {
224 *++uop = new MicroLdrFpXImmUop(machInst, rt,
225 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
226 *++uop = new MicroLdrFpXImmUop(machInst, rt2,
227 INTREG_UREG0, 8, noAlloc, exclusive, acrel);
228 } else {
229 *++uop = new MicroStrFpXImmUop(machInst, rt,
230 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
231 *++uop = new MicroStrFpXImmUop(machInst, rt2,
232 INTREG_UREG0, 8, noAlloc, exclusive, acrel);
233 }
234 } else if (size == 4) {
235 if (load) {
236 *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2,
237 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
238 } else {
239 *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2,
240 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
241 }
242 }
243 } else {
244 if (size == 8) {
245 if (load) {
246 *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0,
247 0, noAlloc, exclusive, acrel);
248 *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0,
249 size, noAlloc, exclusive, acrel);
250 } else {
251 *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0,
252 0, noAlloc, exclusive, acrel);
253 *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0,
254 size, noAlloc, exclusive, acrel);
255 }
256 } else if (size == 4) {
257 if (load) {
258 if (signExt) {
259 *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2,
260 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
261 } else {
262 *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2,
263 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
264 }
265 } else {
266 *++uop = new MicroStrDXImmUop(machInst, rt, rt2,
267 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
268 }
269 }
270 }
271
272 if (writeback) {
273 *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0,
274 post ? imm : 0);
275 }
276
277 (*uop)->setLastMicroop();
278
279 for (StaticInstPtr *curUop = microOps;
280 !(*curUop)->isLastMicroop(); curUop++) {
281 (*curUop)->setDelayedCommit();
282 }
283}
284
285BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
286 OpClass __opClass, bool load, IntRegIndex dest,
287 IntRegIndex base, int64_t imm) :
288 PredMacroOp(mnem, machInst, __opClass)
289{
290 numMicroops = 2;
291 microOps = new StaticInstPtr[numMicroops];
292
293 if (load) {
294 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
295 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
296 } else {
297 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
298 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
299 }
300 microOps[0]->setDelayedCommit();
301 microOps[1]->setLastMicroop();
302}
303
304BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
305 OpClass __opClass, bool load, IntRegIndex dest,
306 IntRegIndex base, int64_t imm) :
307 PredMacroOp(mnem, machInst, __opClass)
308{
309 numMicroops = 3;
310 microOps = new StaticInstPtr[numMicroops];
311
312 if (load) {
313 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0);
314 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0);
315 } else {
316 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0);
317 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
318 }
319 microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
320
321 microOps[0]->setDelayedCommit();
322 microOps[1]->setDelayedCommit();
323 microOps[2]->setLastMicroop();
324}
325
326BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
327 OpClass __opClass, bool load, IntRegIndex dest,
328 IntRegIndex base, int64_t imm) :
329 PredMacroOp(mnem, machInst, __opClass)
330{
331 numMicroops = 3;
332 microOps = new StaticInstPtr[numMicroops];
333
334 if (load) {
335 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
336 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
337 } else {
338 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
339 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
340 }
341 microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
342
343 microOps[0]->setDelayedCommit();
344 microOps[1]->setDelayedCommit();
345 microOps[2]->setLastMicroop();
346}
347
348BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
349 OpClass __opClass, bool load, IntRegIndex dest,
350 IntRegIndex base, IntRegIndex offset,
351 ArmExtendType type, int64_t imm) :
352 PredMacroOp(mnem, machInst, __opClass)
353{
354 numMicroops = 2;
355 microOps = new StaticInstPtr[numMicroops];
356
357 if (load) {
358 microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base,
359 offset, type, imm);
360 microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base,
361 offset, type, imm);
362 } else {
363 microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base,
364 offset, type, imm);
365 microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base,
366 offset, type, imm);
367 }
368
369 microOps[0]->setDelayedCommit();
370 microOps[1]->setLastMicroop();
371}
372
373BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
374 OpClass __opClass, IntRegIndex dest,
375 int64_t imm) :
376 PredMacroOp(mnem, machInst, __opClass)
377{
378 numMicroops = 2;
379 microOps = new StaticInstPtr[numMicroops];
380
381 microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm);
382 microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm);
383
384 microOps[0]->setDelayedCommit();
385 microOps[1]->setLastMicroop();
386}
387
388VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
389 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
390 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
391 PredMacroOp(mnem, machInst, __opClass)
392{
393 assert(regs > 0 && regs <= 4);
394 assert(regs % elems == 0);
395
396 numMicroops = (regs > 2) ? 2 : 1;
397 bool wb = (rm != 15);
398 bool deinterleave = (elems > 1);
399
400 if (wb) numMicroops++;
401 if (deinterleave) numMicroops += (regs / elems);
402 microOps = new StaticInstPtr[numMicroops];
403
404 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
405
406 uint32_t noAlign = TLB::MustBeOne;
407
408 unsigned uopIdx = 0;
409 switch (regs) {
410 case 4:
411 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
412 size, machInst, rMid, rn, 0, align);
413 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
414 size, machInst, rMid + 4, rn, 16, noAlign);
415 break;
416 case 3:
417 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
418 size, machInst, rMid, rn, 0, align);
419 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
420 size, machInst, rMid + 4, rn, 16, noAlign);
421 break;
422 case 2:
423 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
424 size, machInst, rMid, rn, 0, align);
425 break;
426 case 1:
427 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
428 size, machInst, rMid, rn, 0, align);
429 break;
430 default:
431 // Unknown number of registers
432 microOps[uopIdx++] = new Unknown(machInst);
433 }
434 if (wb) {
435 if (rm != 15 && rm != 13) {
436 microOps[uopIdx++] =
437 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
438 } else {
439 microOps[uopIdx++] =
440 new MicroAddiUop(machInst, rn, rn, regs * 8);
441 }
442 }
443 if (deinterleave) {
444 switch (elems) {
445 case 4:
446 assert(regs == 4);
447 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
448 size, machInst, vd * 2, rMid, inc * 2);
449 break;
450 case 3:
451 assert(regs == 3);
452 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
453 size, machInst, vd * 2, rMid, inc * 2);
454 break;
455 case 2:
456 assert(regs == 4 || regs == 2);
457 if (regs == 4) {
458 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
459 size, machInst, vd * 2, rMid, inc * 2);
460 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
461 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
462 } else {
463 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
464 size, machInst, vd * 2, rMid, inc * 2);
465 }
466 break;
467 default:
468 // Bad number of elements to deinterleave
469 microOps[uopIdx++] = new Unknown(machInst);
470 }
471 }
472 assert(uopIdx == numMicroops);
473
474 for (unsigned i = 0; i < numMicroops - 1; i++) {
475 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
476 assert(uopPtr);
477 uopPtr->setDelayedCommit();
478 }
479 microOps[numMicroops - 1]->setLastMicroop();
480}
481
482VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
483 OpClass __opClass, bool all, unsigned elems,
484 RegIndex rn, RegIndex vd, unsigned regs,
485 unsigned inc, uint32_t size, uint32_t align,
486 RegIndex rm, unsigned lane) :
487 PredMacroOp(mnem, machInst, __opClass)
488{
489 assert(regs > 0 && regs <= 4);
490 assert(regs % elems == 0);
491
492 unsigned eBytes = (1 << size);
493 unsigned loadSize = eBytes * elems;
494 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
495 sizeof(FloatRegBits);
496
497 assert(loadRegs > 0 && loadRegs <= 4);
498
499 numMicroops = 1;
500 bool wb = (rm != 15);
501
502 if (wb) numMicroops++;
503 numMicroops += (regs / elems);
504 microOps = new StaticInstPtr[numMicroops];
505
506 RegIndex ufp0 = NumFloatV7ArchRegs;
507
508 unsigned uopIdx = 0;
509 switch (loadSize) {
510 case 1:
511 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
512 machInst, ufp0, rn, 0, align);
513 break;
514 case 2:
515 if (eBytes == 2) {
516 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
517 machInst, ufp0, rn, 0, align);
518 } else {
519 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
520 machInst, ufp0, rn, 0, align);
521 }
522 break;
523 case 3:
524 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
525 machInst, ufp0, rn, 0, align);
526 break;
527 case 4:
528 switch (eBytes) {
529 case 1:
530 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
531 machInst, ufp0, rn, 0, align);
532 break;
533 case 2:
534 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
535 machInst, ufp0, rn, 0, align);
536 break;
537 case 4:
538 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
539 machInst, ufp0, rn, 0, align);
540 break;
541 }
542 break;
543 case 6:
544 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
545 machInst, ufp0, rn, 0, align);
546 break;
547 case 8:
548 switch (eBytes) {
549 case 2:
550 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
551 machInst, ufp0, rn, 0, align);
552 break;
553 case 4:
554 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
555 machInst, ufp0, rn, 0, align);
556 break;
557 }
558 break;
559 case 12:
560 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
561 machInst, ufp0, rn, 0, align);
562 break;
563 case 16:
564 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
565 machInst, ufp0, rn, 0, align);
566 break;
567 default:
568 // Unrecognized load size
569 microOps[uopIdx++] = new Unknown(machInst);
570 }
571 if (wb) {
572 if (rm != 15 && rm != 13) {
573 microOps[uopIdx++] =
574 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
575 } else {
576 microOps[uopIdx++] =
577 new MicroAddiUop(machInst, rn, rn, loadSize);
578 }
579 }
580 switch (elems) {
581 case 4:
582 assert(regs == 4);
583 switch (size) {
584 case 0:
585 if (all) {
586 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
587 machInst, vd * 2, ufp0, inc * 2);
588 } else {
589 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
590 machInst, vd * 2, ufp0, inc * 2, lane);
591 }
592 break;
593 case 1:
594 if (all) {
595 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
596 machInst, vd * 2, ufp0, inc * 2);
597 } else {
598 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
599 machInst, vd * 2, ufp0, inc * 2, lane);
600 }
601 break;
602 case 2:
603 if (all) {
604 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
605 machInst, vd * 2, ufp0, inc * 2);
606 } else {
607 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
608 machInst, vd * 2, ufp0, inc * 2, lane);
609 }
610 break;
611 default:
612 // Bad size
613 microOps[uopIdx++] = new Unknown(machInst);
614 break;
615 }
616 break;
617 case 3:
618 assert(regs == 3);
619 switch (size) {
620 case 0:
621 if (all) {
622 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
623 machInst, vd * 2, ufp0, inc * 2);
624 } else {
625 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
626 machInst, vd * 2, ufp0, inc * 2, lane);
627 }
628 break;
629 case 1:
630 if (all) {
631 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
632 machInst, vd * 2, ufp0, inc * 2);
633 } else {
634 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
635 machInst, vd * 2, ufp0, inc * 2, lane);
636 }
637 break;
638 case 2:
639 if (all) {
640 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
641 machInst, vd * 2, ufp0, inc * 2);
642 } else {
643 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
644 machInst, vd * 2, ufp0, inc * 2, lane);
645 }
646 break;
647 default:
648 // Bad size
649 microOps[uopIdx++] = new Unknown(machInst);
650 break;
651 }
652 break;
653 case 2:
654 assert(regs == 2);
655 assert(loadRegs <= 2);
656 switch (size) {
657 case 0:
658 if (all) {
659 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
660 machInst, vd * 2, ufp0, inc * 2);
661 } else {
662 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
663 machInst, vd * 2, ufp0, inc * 2, lane);
664 }
665 break;
666 case 1:
667 if (all) {
668 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
669 machInst, vd * 2, ufp0, inc * 2);
670 } else {
671 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
672 machInst, vd * 2, ufp0, inc * 2, lane);
673 }
674 break;
675 case 2:
676 if (all) {
677 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
678 machInst, vd * 2, ufp0, inc * 2);
679 } else {
680 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
681 machInst, vd * 2, ufp0, inc * 2, lane);
682 }
683 break;
684 default:
685 // Bad size
686 microOps[uopIdx++] = new Unknown(machInst);
687 break;
688 }
689 break;
690 case 1:
691 assert(regs == 1 || (all && regs == 2));
692 assert(loadRegs <= 2);
693 for (unsigned offset = 0; offset < regs; offset++) {
694 switch (size) {
695 case 0:
696 if (all) {
697 microOps[uopIdx++] =
698 new MicroUnpackAllNeon2to2Uop<uint8_t>(
699 machInst, (vd + offset) * 2, ufp0, inc * 2);
700 } else {
701 microOps[uopIdx++] =
702 new MicroUnpackNeon2to2Uop<uint8_t>(
703 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
704 }
705 break;
706 case 1:
707 if (all) {
708 microOps[uopIdx++] =
709 new MicroUnpackAllNeon2to2Uop<uint16_t>(
710 machInst, (vd + offset) * 2, ufp0, inc * 2);
711 } else {
712 microOps[uopIdx++] =
713 new MicroUnpackNeon2to2Uop<uint16_t>(
714 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
715 }
716 break;
717 case 2:
718 if (all) {
719 microOps[uopIdx++] =
720 new MicroUnpackAllNeon2to2Uop<uint32_t>(
721 machInst, (vd + offset) * 2, ufp0, inc * 2);
722 } else {
723 microOps[uopIdx++] =
724 new MicroUnpackNeon2to2Uop<uint32_t>(
725 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
726 }
727 break;
728 default:
729 // Bad size
730 microOps[uopIdx++] = new Unknown(machInst);
731 break;
732 }
733 }
734 break;
735 default:
736 // Bad number of elements to unpack
737 microOps[uopIdx++] = new Unknown(machInst);
738 }
739 assert(uopIdx == numMicroops);
740
741 for (unsigned i = 0; i < numMicroops - 1; i++) {
742 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
743 assert(uopPtr);
744 uopPtr->setDelayedCommit();
745 }
746 microOps[numMicroops - 1]->setLastMicroop();
747}
748
749VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
750 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
751 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
752 PredMacroOp(mnem, machInst, __opClass)
753{
754 assert(regs > 0 && regs <= 4);
755 assert(regs % elems == 0);
756
757 numMicroops = (regs > 2) ? 2 : 1;
758 bool wb = (rm != 15);
759 bool interleave = (elems > 1);
760
761 if (wb) numMicroops++;
762 if (interleave) numMicroops += (regs / elems);
763 microOps = new StaticInstPtr[numMicroops];
764
765 uint32_t noAlign = TLB::MustBeOne;
766
767 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
768
769 unsigned uopIdx = 0;
770 if (interleave) {
771 switch (elems) {
772 case 4:
773 assert(regs == 4);
774 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
775 size, machInst, rMid, vd * 2, inc * 2);
776 break;
777 case 3:
778 assert(regs == 3);
779 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
780 size, machInst, rMid, vd * 2, inc * 2);
781 break;
782 case 2:
783 assert(regs == 4 || regs == 2);
784 if (regs == 4) {
785 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
786 size, machInst, rMid, vd * 2, inc * 2);
787 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
788 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
789 } else {
790 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
791 size, machInst, rMid, vd * 2, inc * 2);
792 }
793 break;
794 default:
795 // Bad number of elements to interleave
796 microOps[uopIdx++] = new Unknown(machInst);
797 }
798 }
799 switch (regs) {
800 case 4:
801 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
802 size, machInst, rMid, rn, 0, align);
803 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
804 size, machInst, rMid + 4, rn, 16, noAlign);
805 break;
806 case 3:
807 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
808 size, machInst, rMid, rn, 0, align);
809 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
810 size, machInst, rMid + 4, rn, 16, noAlign);
811 break;
812 case 2:
813 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
814 size, machInst, rMid, rn, 0, align);
815 break;
816 case 1:
817 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
818 size, machInst, rMid, rn, 0, align);
819 break;
820 default:
821 // Unknown number of registers
822 microOps[uopIdx++] = new Unknown(machInst);
823 }
824 if (wb) {
825 if (rm != 15 && rm != 13) {
826 microOps[uopIdx++] =
827 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
828 } else {
829 microOps[uopIdx++] =
830 new MicroAddiUop(machInst, rn, rn, regs * 8);
831 }
832 }
833 assert(uopIdx == numMicroops);
834
835 for (unsigned i = 0; i < numMicroops - 1; i++) {
836 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
837 assert(uopPtr);
838 uopPtr->setDelayedCommit();
839 }
840 microOps[numMicroops - 1]->setLastMicroop();
841}
842
843VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
844 OpClass __opClass, bool all, unsigned elems,
845 RegIndex rn, RegIndex vd, unsigned regs,
846 unsigned inc, uint32_t size, uint32_t align,
847 RegIndex rm, unsigned lane) :
848 PredMacroOp(mnem, machInst, __opClass)
849{
850 assert(!all);
851 assert(regs > 0 && regs <= 4);
852 assert(regs % elems == 0);
853
854 unsigned eBytes = (1 << size);
855 unsigned storeSize = eBytes * elems;
856 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
857 sizeof(FloatRegBits);
858
859 assert(storeRegs > 0 && storeRegs <= 4);
860
861 numMicroops = 1;
862 bool wb = (rm != 15);
863
864 if (wb) numMicroops++;
865 numMicroops += (regs / elems);
866 microOps = new StaticInstPtr[numMicroops];
867
868 RegIndex ufp0 = NumFloatV7ArchRegs;
869
870 unsigned uopIdx = 0;
871 switch (elems) {
872 case 4:
873 assert(regs == 4);
874 switch (size) {
875 case 0:
876 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
877 machInst, ufp0, vd * 2, inc * 2, lane);
878 break;
879 case 1:
880 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
881 machInst, ufp0, vd * 2, inc * 2, lane);
882 break;
883 case 2:
884 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
885 machInst, ufp0, vd * 2, inc * 2, lane);
886 break;
887 default:
888 // Bad size
889 microOps[uopIdx++] = new Unknown(machInst);
890 break;
891 }
892 break;
893 case 3:
894 assert(regs == 3);
895 switch (size) {
896 case 0:
897 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
898 machInst, ufp0, vd * 2, inc * 2, lane);
899 break;
900 case 1:
901 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
902 machInst, ufp0, vd * 2, inc * 2, lane);
903 break;
904 case 2:
905 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
906 machInst, ufp0, vd * 2, inc * 2, lane);
907 break;
908 default:
909 // Bad size
910 microOps[uopIdx++] = new Unknown(machInst);
911 break;
912 }
913 break;
914 case 2:
915 assert(regs == 2);
916 assert(storeRegs <= 2);
917 switch (size) {
918 case 0:
919 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
920 machInst, ufp0, vd * 2, inc * 2, lane);
921 break;
922 case 1:
923 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
924 machInst, ufp0, vd * 2, inc * 2, lane);
925 break;
926 case 2:
927 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
928 machInst, ufp0, vd * 2, inc * 2, lane);
929 break;
930 default:
931 // Bad size
932 microOps[uopIdx++] = new Unknown(machInst);
933 break;
934 }
935 break;
936 case 1:
937 assert(regs == 1 || (all && regs == 2));
938 assert(storeRegs <= 2);
939 for (unsigned offset = 0; offset < regs; offset++) {
940 switch (size) {
941 case 0:
942 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
943 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
944 break;
945 case 1:
946 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
947 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
948 break;
949 case 2:
950 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
951 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
952 break;
953 default:
954 // Bad size
955 microOps[uopIdx++] = new Unknown(machInst);
956 break;
957 }
958 }
959 break;
960 default:
961 // Bad number of elements to unpack
962 microOps[uopIdx++] = new Unknown(machInst);
963 }
964 switch (storeSize) {
965 case 1:
966 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
967 machInst, ufp0, rn, 0, align);
968 break;
969 case 2:
970 if (eBytes == 2) {
971 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
972 machInst, ufp0, rn, 0, align);
973 } else {
974 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
975 machInst, ufp0, rn, 0, align);
976 }
977 break;
978 case 3:
979 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
980 machInst, ufp0, rn, 0, align);
981 break;
982 case 4:
983 switch (eBytes) {
984 case 1:
985 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
986 machInst, ufp0, rn, 0, align);
987 break;
988 case 2:
989 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
990 machInst, ufp0, rn, 0, align);
991 break;
992 case 4:
993 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
994 machInst, ufp0, rn, 0, align);
995 break;
996 }
997 break;
998 case 6:
999 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1000 machInst, ufp0, rn, 0, align);
1001 break;
1002 case 8:
1003 switch (eBytes) {
1004 case 2:
1005 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1006 machInst, ufp0, rn, 0, align);
1007 break;
1008 case 4:
1009 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1010 machInst, ufp0, rn, 0, align);
1011 break;
1012 }
1013 break;
1014 case 12:
1015 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1016 machInst, ufp0, rn, 0, align);
1017 break;
1018 case 16:
1019 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1020 machInst, ufp0, rn, 0, align);
1021 break;
1022 default:
1023 // Bad store size
1024 microOps[uopIdx++] = new Unknown(machInst);
1025 }
1026 if (wb) {
1027 if (rm != 15 && rm != 13) {
1028 microOps[uopIdx++] =
1029 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1030 } else {
1031 microOps[uopIdx++] =
1032 new MicroAddiUop(machInst, rn, rn, storeSize);
1033 }
1034 }
1035 assert(uopIdx == numMicroops);
1036
1037 for (unsigned i = 0; i < numMicroops - 1; i++) {
1038 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1039 assert(uopPtr);
1040 uopPtr->setDelayedCommit();
1041 }
1042 microOps[numMicroops - 1]->setLastMicroop();
1043}
1044
1045VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1046 OpClass __opClass, RegIndex rn, RegIndex vd,
1047 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1048 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1049 PredMacroOp(mnem, machInst, __opClass)
1050{
1051 RegIndex vx = NumFloatV8ArchRegs / 4;
1052 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1053 bool baseIsSP = isSP((IntRegIndex) rnsp);
1054
1055 numMicroops = wb ? 1 : 0;
1056
1057 int totNumBytes = numRegs * dataSize / 8;
1058 assert(totNumBytes <= 64);
1059
1060 // The guiding principle here is that no more than 16 bytes can be
1061 // transferred at a time
1062 int numMemMicroops = totNumBytes / 16;
1063 int residuum = totNumBytes % 16;
1064 if (residuum)
1065 ++numMemMicroops;
1066 numMicroops += numMemMicroops;
1067
1068 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1069 numMicroops += numMarshalMicroops;
1070
1071 microOps = new StaticInstPtr[numMicroops];
1072 unsigned uopIdx = 0;
1073 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1074 TLB::AllowUnaligned;
1075
1076 int i = 0;
1077 for(; i < numMemMicroops - 1; ++i) {
1078 microOps[uopIdx++] = new MicroNeonLoad64(
1079 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1080 baseIsSP, 16 /* accSize */, eSize);
1081 }
1082 microOps[uopIdx++] = new MicroNeonLoad64(
1083 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1084 residuum ? residuum : 16 /* accSize */, eSize);
1085
1086 // Writeback microop: the post-increment amount is encoded in "Rm": a
1087 // 64-bit general register OR as '11111' for an immediate value equal to
1088 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1089 if (wb) {
1090 if (rm != ((RegIndex) INTREG_X31)) {
1091 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1092 UXTX, 0);
1093 } else {
1094 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1095 totNumBytes);
1096 }
1097 }
1098
1099 for (int i = 0; i < numMarshalMicroops; ++i) {
1100 microOps[uopIdx++] = new MicroDeintNeon64(
1101 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1102 numStructElems, numRegs, i /* step */);
1103 }
1104
1105 assert(uopIdx == numMicroops);
1106
1107 for (int i = 0; i < numMicroops - 1; ++i) {
1108 microOps[i]->setDelayedCommit();
1109 }
1110 microOps[numMicroops - 1]->setLastMicroop();
1111}
1112
1113VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1114 OpClass __opClass, RegIndex rn, RegIndex vd,
1115 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1116 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1117 PredMacroOp(mnem, machInst, __opClass)
1118{
1119 RegIndex vx = NumFloatV8ArchRegs / 4;
1120 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1121 bool baseIsSP = isSP((IntRegIndex) rnsp);
1122
1123 numMicroops = wb ? 1 : 0;
1124
1125 int totNumBytes = numRegs * dataSize / 8;
1126 assert(totNumBytes <= 64);
1127
1128 // The guiding principle here is that no more than 16 bytes can be
1129 // transferred at a time
1130 int numMemMicroops = totNumBytes / 16;
1131 int residuum = totNumBytes % 16;
1132 if (residuum)
1133 ++numMemMicroops;
1134 numMicroops += numMemMicroops;
1135
1136 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1137 numMicroops += numMarshalMicroops;
1138
1139 microOps = new StaticInstPtr[numMicroops];
1140 unsigned uopIdx = 0;
1141
1142 for(int i = 0; i < numMarshalMicroops; ++i) {
1143 microOps[uopIdx++] = new MicroIntNeon64(
1144 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1145 numStructElems, numRegs, i /* step */);
1146 }
1147
1148 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1149 TLB::AllowUnaligned;
1150
1151 int i = 0;
1152 for(; i < numMemMicroops - 1; ++i) {
1153 microOps[uopIdx++] = new MicroNeonStore64(
1154 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1155 baseIsSP, 16 /* accSize */, eSize);
1156 }
1157 microOps[uopIdx++] = new MicroNeonStore64(
1158 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1159 residuum ? residuum : 16 /* accSize */, eSize);
1160
1161 // Writeback microop: the post-increment amount is encoded in "Rm": a
1162 // 64-bit general register OR as '11111' for an immediate value equal to
1163 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1164 if (wb) {
1165 if (rm != ((RegIndex) INTREG_X31)) {
1166 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1167 UXTX, 0);
1168 } else {
1169 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1170 totNumBytes);
1171 }
1172 }
1173
1174 assert(uopIdx == numMicroops);
1175
1176 for (int i = 0; i < numMicroops - 1; i++) {
1177 microOps[i]->setDelayedCommit();
1178 }
1179 microOps[numMicroops - 1]->setLastMicroop();
1180}
1181
1182VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1183 OpClass __opClass, RegIndex rn, RegIndex vd,
1184 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1185 uint8_t numStructElems, uint8_t index, bool wb,
1186 bool replicate) :
1187 PredMacroOp(mnem, machInst, __opClass)
1188{
1189 RegIndex vx = NumFloatV8ArchRegs / 4;
1190 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1191 bool baseIsSP = isSP((IntRegIndex) rnsp);
1192
1193 numMicroops = wb ? 1 : 0;
1194
1195 int eSizeBytes = 1 << eSize;
1196 int totNumBytes = numStructElems * eSizeBytes;
1197 assert(totNumBytes <= 64);
1198
1199 // The guiding principle here is that no more than 16 bytes can be
1200 // transferred at a time
1201 int numMemMicroops = totNumBytes / 16;
1202 int residuum = totNumBytes % 16;
1203 if (residuum)
1204 ++numMemMicroops;
1205 numMicroops += numMemMicroops;
1206
1207 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1208 numMicroops += numMarshalMicroops;
1209
1210 microOps = new StaticInstPtr[numMicroops];
1211 unsigned uopIdx = 0;
1212
1213 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1214 TLB::AllowUnaligned;
1215
1216 int i = 0;
1217 for (; i < numMemMicroops - 1; ++i) {
1218 microOps[uopIdx++] = new MicroNeonLoad64(
1219 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1220 baseIsSP, 16 /* accSize */, eSize);
1221 }
1222 microOps[uopIdx++] = new MicroNeonLoad64(
1223 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1224 residuum ? residuum : 16 /* accSize */, eSize);
1225
1226 // Writeback microop: the post-increment amount is encoded in "Rm": a
1227 // 64-bit general register OR as '11111' for an immediate value equal to
1228 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1229 if (wb) {
1230 if (rm != ((RegIndex) INTREG_X31)) {
1231 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1232 UXTX, 0);
1233 } else {
1234 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1235 totNumBytes);
1236 }
1237 }
1238
1239 for(int i = 0; i < numMarshalMicroops; ++i) {
1240 microOps[uopIdx++] = new MicroUnpackNeon64(
1241 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1242 numStructElems, index, i /* step */, replicate);
1243 }
1244
1245 assert(uopIdx == numMicroops);
1246
1247 for (int i = 0; i < numMicroops - 1; i++) {
1248 microOps[i]->setDelayedCommit();
1249 }
1250 microOps[numMicroops - 1]->setLastMicroop();
1251}
1252
1253VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1254 OpClass __opClass, RegIndex rn, RegIndex vd,
1255 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1256 uint8_t numStructElems, uint8_t index, bool wb,
1257 bool replicate) :
1258 PredMacroOp(mnem, machInst, __opClass)
1259{
1260 RegIndex vx = NumFloatV8ArchRegs / 4;
1261 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1262 bool baseIsSP = isSP((IntRegIndex) rnsp);
1263
1264 numMicroops = wb ? 1 : 0;
1265
1266 int eSizeBytes = 1 << eSize;
1267 int totNumBytes = numStructElems * eSizeBytes;
1268 assert(totNumBytes <= 64);
1269
1270 // The guiding principle here is that no more than 16 bytes can be
1271 // transferred at a time
1272 int numMemMicroops = totNumBytes / 16;
1273 int residuum = totNumBytes % 16;
1274 if (residuum)
1275 ++numMemMicroops;
1276 numMicroops += numMemMicroops;
1277
1278 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1279 numMicroops += numMarshalMicroops;
1280
1281 microOps = new StaticInstPtr[numMicroops];
1282 unsigned uopIdx = 0;
1283
1284 for(int i = 0; i < numMarshalMicroops; ++i) {
1285 microOps[uopIdx++] = new MicroPackNeon64(
1286 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1287 numStructElems, index, i /* step */, replicate);
1288 }
1289
1290 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1291 TLB::AllowUnaligned;
1292
1293 int i = 0;
1294 for(; i < numMemMicroops - 1; ++i) {
1295 microOps[uopIdx++] = new MicroNeonStore64(
1296 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1297 baseIsSP, 16 /* accsize */, eSize);
1298 }
1299 microOps[uopIdx++] = new MicroNeonStore64(
1300 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1301 residuum ? residuum : 16 /* accSize */, eSize);
1302
1303 // Writeback microop: the post-increment amount is encoded in "Rm": a
1304 // 64-bit general register OR as '11111' for an immediate value equal to
1305 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1306 if (wb) {
1307 if (rm != ((RegIndex) INTREG_X31)) {
1308 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1309 UXTX, 0);
1310 } else {
1311 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1312 totNumBytes);
1313 }
1314 }
1315
1316 assert(uopIdx == numMicroops);
1317
1318 for (int i = 0; i < numMicroops - 1; i++) {
1319 microOps[i]->setDelayedCommit();
1320 }
1321 microOps[numMicroops - 1]->setLastMicroop();
1322}
1323
1324MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1325 OpClass __opClass, IntRegIndex rn,
1326 RegIndex vd, bool single, bool up,
1327 bool writeback, bool load, uint32_t offset) :
1328 PredMacroOp(mnem, machInst, __opClass)
1329{
1330 int i = 0;
1331
1332 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1333 // to be functionally identical except that fldmx is deprecated. For now
1334 // we'll assume they're otherwise interchangable.
1335 int count = (single ? offset : (offset / 2));
1336 if (count == 0 || count > NumFloatV7ArchRegs)
1337 warn_once("Bad offset field for VFP load/store multiple.\n");
1338 if (count == 0) {
1339 // Force there to be at least one microop so the macroop makes sense.
1340 writeback = true;
1341 }
1342 if (count > NumFloatV7ArchRegs)
1343 count = NumFloatV7ArchRegs;
1344
1345 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1346 microOps = new StaticInstPtr[numMicroops];
1347
1348 int64_t addr = 0;
1349
1350 if (!up)
1351 addr = 4 * offset;
1352
1353 bool tempUp = up;
1354 for (int j = 0; j < count; j++) {
1355 if (load) {
1356 if (single) {
1357 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1358 tempUp, addr);
1359 } else {
1360 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1361 tempUp, addr);
1362 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1363 addr + (up ? 4 : -4));
1364 }
1365 } else {
1366 if (single) {
1367 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1368 tempUp, addr);
1369 } else {
1370 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1371 tempUp, addr);
1372 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1373 addr + (up ? 4 : -4));
1374 }
1375 }
1376 if (!tempUp) {
1377 addr -= (single ? 4 : 8);
1378 // The microops don't handle negative displacement, so turn if we
1379 // hit zero, flip polarity and start adding.
1380 if (addr <= 0) {
1381 tempUp = true;
1382 addr = -addr;
1383 }
1384 } else {
1385 addr += (single ? 4 : 8);
1386 }
1387 }
1388
1389 if (writeback) {
1390 if (up) {
1391 microOps[i++] =
1392 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1393 } else {
1394 microOps[i++] =
1395 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1396 }
1397 }
1398
1399 assert(numMicroops == i);
1400 microOps[numMicroops - 1]->setLastMicroop();
1401
1402 for (StaticInstPtr *curUop = microOps;
1403 !(*curUop)->isLastMicroop(); curUop++) {
1404 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1405 assert(uopPtr);
1406 uopPtr->setDelayedCommit();
1407 }
1408}
1409
1410std::string
1411MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1412{
1413 std::stringstream ss;
1414 printMnemonic(ss);
1415 printReg(ss, ura);
1416 ss << ", ";
1417 printReg(ss, urb);
1418 ss << ", ";
1419 ccprintf(ss, "#%d", imm);
1420 return ss.str();
1421}
1422
1423std::string
1424MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1425{
1426 std::stringstream ss;
1427 printMnemonic(ss);
1428 printReg(ss, ura);
1429 ss << ", ";
1430 printReg(ss, urb);
1431 ss << ", ";
1432 ccprintf(ss, "#%d", imm);
1433 return ss.str();
1434}
1435
1436std::string
1437MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1438{
1439 std::stringstream ss;
1440 printMnemonic(ss);
1441 ss << "[PC,CPSR]";
1442 return ss.str();
1443}
1444
1445std::string
1446MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1447{
1448 std::stringstream ss;
1449 printMnemonic(ss);
1450 printReg(ss, ura);
1451 ccprintf(ss, ", ");
1452 printReg(ss, urb);
1453 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1454 return ss.str();
1455}
1456
1457std::string
1458MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1459{
1460 std::stringstream ss;
1461 printMnemonic(ss);
1462 printReg(ss, ura);
1463 ss << ", ";
1464 printReg(ss, urb);
1465 return ss.str();
1466}
1467
1468std::string
1469MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1470{
1471 std::stringstream ss;
1472 printMnemonic(ss);
1473 printReg(ss, ura);
1474 ss << ", ";
1475 printReg(ss, urb);
1476 ss << ", ";
1477 printReg(ss, urc);
1478 return ss.str();
1479}
1480
1481std::string
1482MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1483{
1484 std::stringstream ss;
1485 printMnemonic(ss);
1486 if (isFloating())
1487 printReg(ss, ura + FP_Reg_Base);
1488 else
1489 printReg(ss, ura);
1490 ss << ", [";
1491 printReg(ss, urb);
1492 ss << ", ";
1493 ccprintf(ss, "#%d", imm);
1494 ss << "]";
1495 return ss.str();
1496}
1497
1498}