macromem.cc (7646:a444dbee8c07) macromem.cc (7853:69aae4379062)
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include "arch/arm/insts/macromem.hh"
44#include "arch/arm/decoder.hh"
1/*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include "arch/arm/insts/macromem.hh"
44#include "arch/arm/decoder.hh"
45#include <sstream>
45
46
47using namespace std;
46using namespace ArmISAInst;
47
48namespace ArmISA
49{
50
51MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
52 OpClass __opClass, IntRegIndex rn,
53 bool index, bool up, bool user, bool writeback,
54 bool load, uint32_t reglist) :
55 PredMacroOp(mnem, machInst, __opClass)
56{
57 uint32_t regs = reglist;
58 uint32_t ones = number_of_ones(reglist);
59 // Remember that writeback adds a uop
60 numMicroops = ones + (writeback ? 1 : 0) + 1;
61 microOps = new StaticInstPtr[numMicroops];
62 uint32_t addr = 0;
63
64 if (!up)
65 addr = (ones << 2) - 4;
66
67 if (!index)
68 addr += 4;
69
70 StaticInstPtr *uop = microOps;
71 StaticInstPtr wbUop;
72 if (writeback) {
73 if (up) {
74 wbUop = new MicroAddiUop(machInst, rn, rn, ones * 4);
75 } else {
76 wbUop = new MicroSubiUop(machInst, rn, rn, ones * 4);
77 }
78 }
79
80 // Add 0 to Rn and stick it in ureg0.
81 // This is equivalent to a move.
82 *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
83
84 // Write back at the start for loads. This covers the ldm exception return
85 // case where the base needs to be written in the old mode. Stores may need
86 // the original value of the base, but they don't change mode and can
87 // write back at the end like before.
88 if (load && writeback) {
89 *++uop = wbUop;
90 }
91
92 unsigned reg = 0;
93 bool force_user = user & !bits(reglist, 15);
94 bool exception_ret = user & bits(reglist, 15);
95
96 for (int i = 0; i < ones; i++) {
97 // Find the next register.
98 while (!bits(regs, reg))
99 reg++;
100 replaceBits(regs, reg, 0);
101
102 unsigned regIdx = reg;
103 if (force_user) {
104 regIdx = intRegInMode(MODE_USER, regIdx);
105 }
106
107 if (load) {
108 if (reg == INTREG_PC && exception_ret) {
109 // This must be the exception return form of ldm.
110 *++uop = new MicroLdrRetUop(machInst, regIdx,
111 INTREG_UREG0, up, addr);
112 } else {
113 *++uop = new MicroLdrUop(machInst, regIdx,
114 INTREG_UREG0, up, addr);
115 }
116 } else {
117 *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);
118 }
119
120 if (up)
121 addr += 4;
122 else
123 addr -= 4;
124 }
125
126 if (!load && writeback) {
127 *++uop = wbUop;
128 }
129
130 (*uop)->setLastMicroop();
131
132 for (StaticInstPtr *curUop = microOps;
133 !(*curUop)->isLastMicroop(); curUop++) {
134 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
135 assert(uopPtr);
136 uopPtr->setDelayedCommit();
137 }
138}
139
140VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
141 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
142 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
143 PredMacroOp(mnem, machInst, __opClass)
144{
145 assert(regs > 0 && regs <= 4);
146 assert(regs % elems == 0);
147
148 numMicroops = (regs > 2) ? 2 : 1;
149 bool wb = (rm != 15);
150 bool deinterleave = (elems > 1);
151
152 if (wb) numMicroops++;
153 if (deinterleave) numMicroops += (regs / elems);
154 microOps = new StaticInstPtr[numMicroops];
155
156 RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
157
158 uint32_t noAlign = TLB::MustBeOne;
159
160 unsigned uopIdx = 0;
161 switch (regs) {
162 case 4:
163 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
164 size, machInst, rMid, rn, 0, align);
165 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
166 size, machInst, rMid + 4, rn, 16, noAlign);
167 break;
168 case 3:
169 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
170 size, machInst, rMid, rn, 0, align);
171 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
172 size, machInst, rMid + 4, rn, 16, noAlign);
173 break;
174 case 2:
175 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
176 size, machInst, rMid, rn, 0, align);
177 break;
178 case 1:
179 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
180 size, machInst, rMid, rn, 0, align);
181 break;
182 default:
48using namespace ArmISAInst;
49
50namespace ArmISA
51{
52
53MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
54 OpClass __opClass, IntRegIndex rn,
55 bool index, bool up, bool user, bool writeback,
56 bool load, uint32_t reglist) :
57 PredMacroOp(mnem, machInst, __opClass)
58{
59 uint32_t regs = reglist;
60 uint32_t ones = number_of_ones(reglist);
61 // Remember that writeback adds a uop
62 numMicroops = ones + (writeback ? 1 : 0) + 1;
63 microOps = new StaticInstPtr[numMicroops];
64 uint32_t addr = 0;
65
66 if (!up)
67 addr = (ones << 2) - 4;
68
69 if (!index)
70 addr += 4;
71
72 StaticInstPtr *uop = microOps;
73 StaticInstPtr wbUop;
74 if (writeback) {
75 if (up) {
76 wbUop = new MicroAddiUop(machInst, rn, rn, ones * 4);
77 } else {
78 wbUop = new MicroSubiUop(machInst, rn, rn, ones * 4);
79 }
80 }
81
82 // Add 0 to Rn and stick it in ureg0.
83 // This is equivalent to a move.
84 *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
85
86 // Write back at the start for loads. This covers the ldm exception return
87 // case where the base needs to be written in the old mode. Stores may need
88 // the original value of the base, but they don't change mode and can
89 // write back at the end like before.
90 if (load && writeback) {
91 *++uop = wbUop;
92 }
93
94 unsigned reg = 0;
95 bool force_user = user & !bits(reglist, 15);
96 bool exception_ret = user & bits(reglist, 15);
97
98 for (int i = 0; i < ones; i++) {
99 // Find the next register.
100 while (!bits(regs, reg))
101 reg++;
102 replaceBits(regs, reg, 0);
103
104 unsigned regIdx = reg;
105 if (force_user) {
106 regIdx = intRegInMode(MODE_USER, regIdx);
107 }
108
109 if (load) {
110 if (reg == INTREG_PC && exception_ret) {
111 // This must be the exception return form of ldm.
112 *++uop = new MicroLdrRetUop(machInst, regIdx,
113 INTREG_UREG0, up, addr);
114 } else {
115 *++uop = new MicroLdrUop(machInst, regIdx,
116 INTREG_UREG0, up, addr);
117 }
118 } else {
119 *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);
120 }
121
122 if (up)
123 addr += 4;
124 else
125 addr -= 4;
126 }
127
128 if (!load && writeback) {
129 *++uop = wbUop;
130 }
131
132 (*uop)->setLastMicroop();
133
134 for (StaticInstPtr *curUop = microOps;
135 !(*curUop)->isLastMicroop(); curUop++) {
136 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
137 assert(uopPtr);
138 uopPtr->setDelayedCommit();
139 }
140}
141
142VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
143 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
144 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
145 PredMacroOp(mnem, machInst, __opClass)
146{
147 assert(regs > 0 && regs <= 4);
148 assert(regs % elems == 0);
149
150 numMicroops = (regs > 2) ? 2 : 1;
151 bool wb = (rm != 15);
152 bool deinterleave = (elems > 1);
153
154 if (wb) numMicroops++;
155 if (deinterleave) numMicroops += (regs / elems);
156 microOps = new StaticInstPtr[numMicroops];
157
158 RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
159
160 uint32_t noAlign = TLB::MustBeOne;
161
162 unsigned uopIdx = 0;
163 switch (regs) {
164 case 4:
165 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
166 size, machInst, rMid, rn, 0, align);
167 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
168 size, machInst, rMid + 4, rn, 16, noAlign);
169 break;
170 case 3:
171 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
172 size, machInst, rMid, rn, 0, align);
173 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
174 size, machInst, rMid + 4, rn, 16, noAlign);
175 break;
176 case 2:
177 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
178 size, machInst, rMid, rn, 0, align);
179 break;
180 case 1:
181 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
182 size, machInst, rMid, rn, 0, align);
183 break;
184 default:
183 panic("Unrecognized number of registers %d.\n", regs);
185 // Unknown number of registers
186 microOps[uopIdx++] = new Unknown(machInst);
184 }
185 if (wb) {
186 if (rm != 15 && rm != 13) {
187 microOps[uopIdx++] =
188 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
189 } else {
190 microOps[uopIdx++] =
191 new MicroAddiUop(machInst, rn, rn, regs * 8);
192 }
193 }
194 if (deinterleave) {
195 switch (elems) {
196 case 4:
197 assert(regs == 4);
198 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
199 size, machInst, vd * 2, rMid, inc * 2);
200 break;
201 case 3:
202 assert(regs == 3);
203 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
204 size, machInst, vd * 2, rMid, inc * 2);
205 break;
206 case 2:
207 assert(regs == 4 || regs == 2);
208 if (regs == 4) {
209 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
210 size, machInst, vd * 2, rMid, inc * 2);
211 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
212 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
213 } else {
214 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
215 size, machInst, vd * 2, rMid, inc * 2);
216 }
217 break;
218 default:
187 }
188 if (wb) {
189 if (rm != 15 && rm != 13) {
190 microOps[uopIdx++] =
191 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
192 } else {
193 microOps[uopIdx++] =
194 new MicroAddiUop(machInst, rn, rn, regs * 8);
195 }
196 }
197 if (deinterleave) {
198 switch (elems) {
199 case 4:
200 assert(regs == 4);
201 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
202 size, machInst, vd * 2, rMid, inc * 2);
203 break;
204 case 3:
205 assert(regs == 3);
206 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
207 size, machInst, vd * 2, rMid, inc * 2);
208 break;
209 case 2:
210 assert(regs == 4 || regs == 2);
211 if (regs == 4) {
212 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
213 size, machInst, vd * 2, rMid, inc * 2);
214 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
215 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
216 } else {
217 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
218 size, machInst, vd * 2, rMid, inc * 2);
219 }
220 break;
221 default:
219 panic("Bad number of elements to deinterleave %d.\n", elems);
222 // Bad number of elements to deinterleave
223 microOps[uopIdx++] = new Unknown(machInst);
220 }
221 }
222 assert(uopIdx == numMicroops);
223
224 for (unsigned i = 0; i < numMicroops - 1; i++) {
225 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
226 assert(uopPtr);
227 uopPtr->setDelayedCommit();
228 }
229 microOps[numMicroops - 1]->setLastMicroop();
230}
231
232VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
233 OpClass __opClass, bool all, unsigned elems,
234 RegIndex rn, RegIndex vd, unsigned regs,
235 unsigned inc, uint32_t size, uint32_t align,
236 RegIndex rm, unsigned lane) :
237 PredMacroOp(mnem, machInst, __opClass)
238{
239 assert(regs > 0 && regs <= 4);
240 assert(regs % elems == 0);
241
242 unsigned eBytes = (1 << size);
243 unsigned loadSize = eBytes * elems;
244 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
245 sizeof(FloatRegBits);
246
247 assert(loadRegs > 0 && loadRegs <= 4);
248
249 numMicroops = 1;
250 bool wb = (rm != 15);
251
252 if (wb) numMicroops++;
253 numMicroops += (regs / elems);
254 microOps = new StaticInstPtr[numMicroops];
255
256 RegIndex ufp0 = NumFloatArchRegs;
257
258 unsigned uopIdx = 0;
259 switch (loadSize) {
260 case 1:
261 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
262 machInst, ufp0, rn, 0, align);
263 break;
264 case 2:
265 if (eBytes == 2) {
266 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
267 machInst, ufp0, rn, 0, align);
268 } else {
269 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
270 machInst, ufp0, rn, 0, align);
271 }
272 break;
273 case 3:
274 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
275 machInst, ufp0, rn, 0, align);
276 break;
277 case 4:
278 switch (eBytes) {
279 case 1:
280 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
281 machInst, ufp0, rn, 0, align);
282 break;
283 case 2:
284 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
285 machInst, ufp0, rn, 0, align);
286 break;
287 case 4:
288 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
289 machInst, ufp0, rn, 0, align);
290 break;
291 }
292 break;
293 case 6:
294 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
295 machInst, ufp0, rn, 0, align);
296 break;
297 case 8:
298 switch (eBytes) {
299 case 2:
300 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
301 machInst, ufp0, rn, 0, align);
302 break;
303 case 4:
304 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
305 machInst, ufp0, rn, 0, align);
306 break;
307 }
308 break;
309 case 12:
310 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
311 machInst, ufp0, rn, 0, align);
312 break;
313 case 16:
314 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
315 machInst, ufp0, rn, 0, align);
316 break;
317 default:
224 }
225 }
226 assert(uopIdx == numMicroops);
227
228 for (unsigned i = 0; i < numMicroops - 1; i++) {
229 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
230 assert(uopPtr);
231 uopPtr->setDelayedCommit();
232 }
233 microOps[numMicroops - 1]->setLastMicroop();
234}
235
236VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
237 OpClass __opClass, bool all, unsigned elems,
238 RegIndex rn, RegIndex vd, unsigned regs,
239 unsigned inc, uint32_t size, uint32_t align,
240 RegIndex rm, unsigned lane) :
241 PredMacroOp(mnem, machInst, __opClass)
242{
243 assert(regs > 0 && regs <= 4);
244 assert(regs % elems == 0);
245
246 unsigned eBytes = (1 << size);
247 unsigned loadSize = eBytes * elems;
248 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
249 sizeof(FloatRegBits);
250
251 assert(loadRegs > 0 && loadRegs <= 4);
252
253 numMicroops = 1;
254 bool wb = (rm != 15);
255
256 if (wb) numMicroops++;
257 numMicroops += (regs / elems);
258 microOps = new StaticInstPtr[numMicroops];
259
260 RegIndex ufp0 = NumFloatArchRegs;
261
262 unsigned uopIdx = 0;
263 switch (loadSize) {
264 case 1:
265 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
266 machInst, ufp0, rn, 0, align);
267 break;
268 case 2:
269 if (eBytes == 2) {
270 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
271 machInst, ufp0, rn, 0, align);
272 } else {
273 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
274 machInst, ufp0, rn, 0, align);
275 }
276 break;
277 case 3:
278 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
279 machInst, ufp0, rn, 0, align);
280 break;
281 case 4:
282 switch (eBytes) {
283 case 1:
284 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
285 machInst, ufp0, rn, 0, align);
286 break;
287 case 2:
288 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
289 machInst, ufp0, rn, 0, align);
290 break;
291 case 4:
292 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
293 machInst, ufp0, rn, 0, align);
294 break;
295 }
296 break;
297 case 6:
298 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
299 machInst, ufp0, rn, 0, align);
300 break;
301 case 8:
302 switch (eBytes) {
303 case 2:
304 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
305 machInst, ufp0, rn, 0, align);
306 break;
307 case 4:
308 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
309 machInst, ufp0, rn, 0, align);
310 break;
311 }
312 break;
313 case 12:
314 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
315 machInst, ufp0, rn, 0, align);
316 break;
317 case 16:
318 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
319 machInst, ufp0, rn, 0, align);
320 break;
321 default:
318 panic("Unrecognized load size %d.\n", regs);
322 // Unrecognized load size
323 microOps[uopIdx++] = new Unknown(machInst);
319 }
320 if (wb) {
321 if (rm != 15 && rm != 13) {
322 microOps[uopIdx++] =
323 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
324 } else {
325 microOps[uopIdx++] =
326 new MicroAddiUop(machInst, rn, rn, loadSize);
327 }
328 }
329 switch (elems) {
330 case 4:
331 assert(regs == 4);
332 switch (size) {
333 case 0:
334 if (all) {
335 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
336 machInst, vd * 2, ufp0, inc * 2);
337 } else {
338 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
339 machInst, vd * 2, ufp0, inc * 2, lane);
340 }
341 break;
342 case 1:
343 if (all) {
344 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
345 machInst, vd * 2, ufp0, inc * 2);
346 } else {
347 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
348 machInst, vd * 2, ufp0, inc * 2, lane);
349 }
350 break;
351 case 2:
352 if (all) {
353 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
354 machInst, vd * 2, ufp0, inc * 2);
355 } else {
356 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
357 machInst, vd * 2, ufp0, inc * 2, lane);
358 }
359 break;
360 default:
324 }
325 if (wb) {
326 if (rm != 15 && rm != 13) {
327 microOps[uopIdx++] =
328 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
329 } else {
330 microOps[uopIdx++] =
331 new MicroAddiUop(machInst, rn, rn, loadSize);
332 }
333 }
334 switch (elems) {
335 case 4:
336 assert(regs == 4);
337 switch (size) {
338 case 0:
339 if (all) {
340 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
341 machInst, vd * 2, ufp0, inc * 2);
342 } else {
343 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
344 machInst, vd * 2, ufp0, inc * 2, lane);
345 }
346 break;
347 case 1:
348 if (all) {
349 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
350 machInst, vd * 2, ufp0, inc * 2);
351 } else {
352 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
353 machInst, vd * 2, ufp0, inc * 2, lane);
354 }
355 break;
356 case 2:
357 if (all) {
358 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
359 machInst, vd * 2, ufp0, inc * 2);
360 } else {
361 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
362 machInst, vd * 2, ufp0, inc * 2, lane);
363 }
364 break;
365 default:
361 panic("Bad size %d.\n", size);
366 // Bad size
367 microOps[uopIdx++] = new Unknown(machInst);
362 break;
363 }
364 break;
365 case 3:
366 assert(regs == 3);
367 switch (size) {
368 case 0:
369 if (all) {
370 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
371 machInst, vd * 2, ufp0, inc * 2);
372 } else {
373 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
374 machInst, vd * 2, ufp0, inc * 2, lane);
375 }
376 break;
377 case 1:
378 if (all) {
379 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
380 machInst, vd * 2, ufp0, inc * 2);
381 } else {
382 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
383 machInst, vd * 2, ufp0, inc * 2, lane);
384 }
385 break;
386 case 2:
387 if (all) {
388 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
389 machInst, vd * 2, ufp0, inc * 2);
390 } else {
391 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
392 machInst, vd * 2, ufp0, inc * 2, lane);
393 }
394 break;
395 default:
368 break;
369 }
370 break;
371 case 3:
372 assert(regs == 3);
373 switch (size) {
374 case 0:
375 if (all) {
376 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
377 machInst, vd * 2, ufp0, inc * 2);
378 } else {
379 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
380 machInst, vd * 2, ufp0, inc * 2, lane);
381 }
382 break;
383 case 1:
384 if (all) {
385 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
386 machInst, vd * 2, ufp0, inc * 2);
387 } else {
388 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
389 machInst, vd * 2, ufp0, inc * 2, lane);
390 }
391 break;
392 case 2:
393 if (all) {
394 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
395 machInst, vd * 2, ufp0, inc * 2);
396 } else {
397 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
398 machInst, vd * 2, ufp0, inc * 2, lane);
399 }
400 break;
401 default:
396 panic("Bad size %d.\n", size);
402 // Bad size
403 microOps[uopIdx++] = new Unknown(machInst);
397 break;
398 }
399 break;
400 case 2:
401 assert(regs == 2);
402 assert(loadRegs <= 2);
403 switch (size) {
404 case 0:
405 if (all) {
406 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
407 machInst, vd * 2, ufp0, inc * 2);
408 } else {
409 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
410 machInst, vd * 2, ufp0, inc * 2, lane);
411 }
412 break;
413 case 1:
414 if (all) {
415 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
416 machInst, vd * 2, ufp0, inc * 2);
417 } else {
418 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
419 machInst, vd * 2, ufp0, inc * 2, lane);
420 }
421 break;
422 case 2:
423 if (all) {
424 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
425 machInst, vd * 2, ufp0, inc * 2);
426 } else {
427 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
428 machInst, vd * 2, ufp0, inc * 2, lane);
429 }
430 break;
431 default:
404 break;
405 }
406 break;
407 case 2:
408 assert(regs == 2);
409 assert(loadRegs <= 2);
410 switch (size) {
411 case 0:
412 if (all) {
413 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
414 machInst, vd * 2, ufp0, inc * 2);
415 } else {
416 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
417 machInst, vd * 2, ufp0, inc * 2, lane);
418 }
419 break;
420 case 1:
421 if (all) {
422 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
423 machInst, vd * 2, ufp0, inc * 2);
424 } else {
425 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
426 machInst, vd * 2, ufp0, inc * 2, lane);
427 }
428 break;
429 case 2:
430 if (all) {
431 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
432 machInst, vd * 2, ufp0, inc * 2);
433 } else {
434 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
435 machInst, vd * 2, ufp0, inc * 2, lane);
436 }
437 break;
438 default:
432 panic("Bad size %d.\n", size);
439 // Bad size
440 microOps[uopIdx++] = new Unknown(machInst);
433 break;
434 }
435 break;
436 case 1:
437 assert(regs == 1 || (all && regs == 2));
438 assert(loadRegs <= 2);
439 for (unsigned offset = 0; offset < regs; offset++) {
440 switch (size) {
441 case 0:
442 if (all) {
443 microOps[uopIdx++] =
444 new MicroUnpackAllNeon2to2Uop<uint8_t>(
445 machInst, (vd + offset) * 2, ufp0, inc * 2);
446 } else {
447 microOps[uopIdx++] =
448 new MicroUnpackNeon2to2Uop<uint8_t>(
449 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
450 }
451 break;
452 case 1:
453 if (all) {
454 microOps[uopIdx++] =
455 new MicroUnpackAllNeon2to2Uop<uint16_t>(
456 machInst, (vd + offset) * 2, ufp0, inc * 2);
457 } else {
458 microOps[uopIdx++] =
459 new MicroUnpackNeon2to2Uop<uint16_t>(
460 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
461 }
462 break;
463 case 2:
464 if (all) {
465 microOps[uopIdx++] =
466 new MicroUnpackAllNeon2to2Uop<uint32_t>(
467 machInst, (vd + offset) * 2, ufp0, inc * 2);
468 } else {
469 microOps[uopIdx++] =
470 new MicroUnpackNeon2to2Uop<uint32_t>(
471 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
472 }
473 break;
474 default:
441 break;
442 }
443 break;
444 case 1:
445 assert(regs == 1 || (all && regs == 2));
446 assert(loadRegs <= 2);
447 for (unsigned offset = 0; offset < regs; offset++) {
448 switch (size) {
449 case 0:
450 if (all) {
451 microOps[uopIdx++] =
452 new MicroUnpackAllNeon2to2Uop<uint8_t>(
453 machInst, (vd + offset) * 2, ufp0, inc * 2);
454 } else {
455 microOps[uopIdx++] =
456 new MicroUnpackNeon2to2Uop<uint8_t>(
457 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
458 }
459 break;
460 case 1:
461 if (all) {
462 microOps[uopIdx++] =
463 new MicroUnpackAllNeon2to2Uop<uint16_t>(
464 machInst, (vd + offset) * 2, ufp0, inc * 2);
465 } else {
466 microOps[uopIdx++] =
467 new MicroUnpackNeon2to2Uop<uint16_t>(
468 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
469 }
470 break;
471 case 2:
472 if (all) {
473 microOps[uopIdx++] =
474 new MicroUnpackAllNeon2to2Uop<uint32_t>(
475 machInst, (vd + offset) * 2, ufp0, inc * 2);
476 } else {
477 microOps[uopIdx++] =
478 new MicroUnpackNeon2to2Uop<uint32_t>(
479 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
480 }
481 break;
482 default:
475 panic("Bad size %d.\n", size);
483 // Bad size
484 microOps[uopIdx++] = new Unknown(machInst);
476 break;
477 }
478 }
479 break;
480 default:
485 break;
486 }
487 }
488 break;
489 default:
481 panic("Bad number of elements to unpack %d.\n", elems);
490 // Bad number of elements to unpack
491 microOps[uopIdx++] = new Unknown(machInst);
482 }
483 assert(uopIdx == numMicroops);
484
485 for (unsigned i = 0; i < numMicroops - 1; i++) {
486 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
487 assert(uopPtr);
488 uopPtr->setDelayedCommit();
489 }
490 microOps[numMicroops - 1]->setLastMicroop();
491}
492
493VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
494 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
495 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
496 PredMacroOp(mnem, machInst, __opClass)
497{
498 assert(regs > 0 && regs <= 4);
499 assert(regs % elems == 0);
500
501 numMicroops = (regs > 2) ? 2 : 1;
502 bool wb = (rm != 15);
503 bool interleave = (elems > 1);
504
505 if (wb) numMicroops++;
506 if (interleave) numMicroops += (regs / elems);
507 microOps = new StaticInstPtr[numMicroops];
508
509 uint32_t noAlign = TLB::MustBeOne;
510
511 RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
512
513 unsigned uopIdx = 0;
514 if (interleave) {
515 switch (elems) {
516 case 4:
517 assert(regs == 4);
518 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
519 size, machInst, rMid, vd * 2, inc * 2);
520 break;
521 case 3:
522 assert(regs == 3);
523 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
524 size, machInst, rMid, vd * 2, inc * 2);
525 break;
526 case 2:
527 assert(regs == 4 || regs == 2);
528 if (regs == 4) {
529 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
530 size, machInst, rMid, vd * 2, inc * 2);
531 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
532 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
533 } else {
534 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
535 size, machInst, rMid, vd * 2, inc * 2);
536 }
537 break;
538 default:
492 }
493 assert(uopIdx == numMicroops);
494
495 for (unsigned i = 0; i < numMicroops - 1; i++) {
496 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
497 assert(uopPtr);
498 uopPtr->setDelayedCommit();
499 }
500 microOps[numMicroops - 1]->setLastMicroop();
501}
502
503VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
504 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
505 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
506 PredMacroOp(mnem, machInst, __opClass)
507{
508 assert(regs > 0 && regs <= 4);
509 assert(regs % elems == 0);
510
511 numMicroops = (regs > 2) ? 2 : 1;
512 bool wb = (rm != 15);
513 bool interleave = (elems > 1);
514
515 if (wb) numMicroops++;
516 if (interleave) numMicroops += (regs / elems);
517 microOps = new StaticInstPtr[numMicroops];
518
519 uint32_t noAlign = TLB::MustBeOne;
520
521 RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
522
523 unsigned uopIdx = 0;
524 if (interleave) {
525 switch (elems) {
526 case 4:
527 assert(regs == 4);
528 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
529 size, machInst, rMid, vd * 2, inc * 2);
530 break;
531 case 3:
532 assert(regs == 3);
533 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
534 size, machInst, rMid, vd * 2, inc * 2);
535 break;
536 case 2:
537 assert(regs == 4 || regs == 2);
538 if (regs == 4) {
539 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
540 size, machInst, rMid, vd * 2, inc * 2);
541 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
542 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
543 } else {
544 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
545 size, machInst, rMid, vd * 2, inc * 2);
546 }
547 break;
548 default:
539 panic("Bad number of elements to interleave %d.\n", elems);
549 // Bad number of elements to interleave
550 microOps[uopIdx++] = new Unknown(machInst);
540 }
541 }
542 switch (regs) {
543 case 4:
544 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
545 size, machInst, rMid, rn, 0, align);
546 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
547 size, machInst, rMid + 4, rn, 16, noAlign);
548 break;
549 case 3:
550 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
551 size, machInst, rMid, rn, 0, align);
552 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
553 size, machInst, rMid + 4, rn, 16, noAlign);
554 break;
555 case 2:
556 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
557 size, machInst, rMid, rn, 0, align);
558 break;
559 case 1:
560 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
561 size, machInst, rMid, rn, 0, align);
562 break;
563 default:
551 }
552 }
553 switch (regs) {
554 case 4:
555 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
556 size, machInst, rMid, rn, 0, align);
557 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
558 size, machInst, rMid + 4, rn, 16, noAlign);
559 break;
560 case 3:
561 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
562 size, machInst, rMid, rn, 0, align);
563 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
564 size, machInst, rMid + 4, rn, 16, noAlign);
565 break;
566 case 2:
567 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
568 size, machInst, rMid, rn, 0, align);
569 break;
570 case 1:
571 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
572 size, machInst, rMid, rn, 0, align);
573 break;
574 default:
564 panic("Unrecognized number of registers %d.\n", regs);
575 // Unknown number of registers
576 microOps[uopIdx++] = new Unknown(machInst);
565 }
566 if (wb) {
567 if (rm != 15 && rm != 13) {
568 microOps[uopIdx++] =
569 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
570 } else {
571 microOps[uopIdx++] =
572 new MicroAddiUop(machInst, rn, rn, regs * 8);
573 }
574 }
575 assert(uopIdx == numMicroops);
576
577 for (unsigned i = 0; i < numMicroops - 1; i++) {
578 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
579 assert(uopPtr);
580 uopPtr->setDelayedCommit();
581 }
582 microOps[numMicroops - 1]->setLastMicroop();
583}
584
585VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
586 OpClass __opClass, bool all, unsigned elems,
587 RegIndex rn, RegIndex vd, unsigned regs,
588 unsigned inc, uint32_t size, uint32_t align,
589 RegIndex rm, unsigned lane) :
590 PredMacroOp(mnem, machInst, __opClass)
591{
592 assert(!all);
593 assert(regs > 0 && regs <= 4);
594 assert(regs % elems == 0);
595
596 unsigned eBytes = (1 << size);
597 unsigned storeSize = eBytes * elems;
598 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
599 sizeof(FloatRegBits);
600
601 assert(storeRegs > 0 && storeRegs <= 4);
602
603 numMicroops = 1;
604 bool wb = (rm != 15);
605
606 if (wb) numMicroops++;
607 numMicroops += (regs / elems);
608 microOps = new StaticInstPtr[numMicroops];
609
610 RegIndex ufp0 = NumFloatArchRegs;
611
612 unsigned uopIdx = 0;
613 switch (elems) {
614 case 4:
615 assert(regs == 4);
616 switch (size) {
617 case 0:
618 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
619 machInst, ufp0, vd * 2, inc * 2, lane);
620 break;
621 case 1:
622 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
623 machInst, ufp0, vd * 2, inc * 2, lane);
624 break;
625 case 2:
626 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
627 machInst, ufp0, vd * 2, inc * 2, lane);
628 break;
629 default:
577 }
578 if (wb) {
579 if (rm != 15 && rm != 13) {
580 microOps[uopIdx++] =
581 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
582 } else {
583 microOps[uopIdx++] =
584 new MicroAddiUop(machInst, rn, rn, regs * 8);
585 }
586 }
587 assert(uopIdx == numMicroops);
588
589 for (unsigned i = 0; i < numMicroops - 1; i++) {
590 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
591 assert(uopPtr);
592 uopPtr->setDelayedCommit();
593 }
594 microOps[numMicroops - 1]->setLastMicroop();
595}
596
597VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
598 OpClass __opClass, bool all, unsigned elems,
599 RegIndex rn, RegIndex vd, unsigned regs,
600 unsigned inc, uint32_t size, uint32_t align,
601 RegIndex rm, unsigned lane) :
602 PredMacroOp(mnem, machInst, __opClass)
603{
604 assert(!all);
605 assert(regs > 0 && regs <= 4);
606 assert(regs % elems == 0);
607
608 unsigned eBytes = (1 << size);
609 unsigned storeSize = eBytes * elems;
610 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
611 sizeof(FloatRegBits);
612
613 assert(storeRegs > 0 && storeRegs <= 4);
614
615 numMicroops = 1;
616 bool wb = (rm != 15);
617
618 if (wb) numMicroops++;
619 numMicroops += (regs / elems);
620 microOps = new StaticInstPtr[numMicroops];
621
622 RegIndex ufp0 = NumFloatArchRegs;
623
624 unsigned uopIdx = 0;
625 switch (elems) {
626 case 4:
627 assert(regs == 4);
628 switch (size) {
629 case 0:
630 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
631 machInst, ufp0, vd * 2, inc * 2, lane);
632 break;
633 case 1:
634 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
635 machInst, ufp0, vd * 2, inc * 2, lane);
636 break;
637 case 2:
638 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
639 machInst, ufp0, vd * 2, inc * 2, lane);
640 break;
641 default:
630 panic("Bad size %d.\n", size);
642 // Bad size
643 microOps[uopIdx++] = new Unknown(machInst);
631 break;
632 }
633 break;
634 case 3:
635 assert(regs == 3);
636 switch (size) {
637 case 0:
638 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
639 machInst, ufp0, vd * 2, inc * 2, lane);
640 break;
641 case 1:
642 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
643 machInst, ufp0, vd * 2, inc * 2, lane);
644 break;
645 case 2:
646 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
647 machInst, ufp0, vd * 2, inc * 2, lane);
648 break;
649 default:
644 break;
645 }
646 break;
647 case 3:
648 assert(regs == 3);
649 switch (size) {
650 case 0:
651 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
652 machInst, ufp0, vd * 2, inc * 2, lane);
653 break;
654 case 1:
655 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
656 machInst, ufp0, vd * 2, inc * 2, lane);
657 break;
658 case 2:
659 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
660 machInst, ufp0, vd * 2, inc * 2, lane);
661 break;
662 default:
650 panic("Bad size %d.\n", size);
663 // Bad size
664 microOps[uopIdx++] = new Unknown(machInst);
651 break;
652 }
653 break;
654 case 2:
655 assert(regs == 2);
656 assert(storeRegs <= 2);
657 switch (size) {
658 case 0:
659 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
660 machInst, ufp0, vd * 2, inc * 2, lane);
661 break;
662 case 1:
663 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
664 machInst, ufp0, vd * 2, inc * 2, lane);
665 break;
666 case 2:
667 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
668 machInst, ufp0, vd * 2, inc * 2, lane);
669 break;
670 default:
665 break;
666 }
667 break;
668 case 2:
669 assert(regs == 2);
670 assert(storeRegs <= 2);
671 switch (size) {
672 case 0:
673 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
674 machInst, ufp0, vd * 2, inc * 2, lane);
675 break;
676 case 1:
677 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
678 machInst, ufp0, vd * 2, inc * 2, lane);
679 break;
680 case 2:
681 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
682 machInst, ufp0, vd * 2, inc * 2, lane);
683 break;
684 default:
671 panic("Bad size %d.\n", size);
685 // Bad size
686 microOps[uopIdx++] = new Unknown(machInst);
672 break;
673 }
674 break;
675 case 1:
676 assert(regs == 1 || (all && regs == 2));
677 assert(storeRegs <= 2);
678 for (unsigned offset = 0; offset < regs; offset++) {
679 switch (size) {
680 case 0:
681 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
682 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
683 break;
684 case 1:
685 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
686 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
687 break;
688 case 2:
689 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
690 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
691 break;
692 default:
687 break;
688 }
689 break;
690 case 1:
691 assert(regs == 1 || (all && regs == 2));
692 assert(storeRegs <= 2);
693 for (unsigned offset = 0; offset < regs; offset++) {
694 switch (size) {
695 case 0:
696 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
697 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
698 break;
699 case 1:
700 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
701 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
702 break;
703 case 2:
704 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
705 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
706 break;
707 default:
693 panic("Bad size %d.\n", size);
708 // Bad size
709 microOps[uopIdx++] = new Unknown(machInst);
694 break;
695 }
696 }
697 break;
698 default:
710 break;
711 }
712 }
713 break;
714 default:
699 panic("Bad number of elements to pack %d.\n", elems);
715 // Bad number of elements to unpack
716 microOps[uopIdx++] = new Unknown(machInst);
700 }
701 switch (storeSize) {
702 case 1:
703 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
704 machInst, ufp0, rn, 0, align);
705 break;
706 case 2:
707 if (eBytes == 2) {
708 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
709 machInst, ufp0, rn, 0, align);
710 } else {
711 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
712 machInst, ufp0, rn, 0, align);
713 }
714 break;
715 case 3:
716 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
717 machInst, ufp0, rn, 0, align);
718 break;
719 case 4:
720 switch (eBytes) {
721 case 1:
722 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
723 machInst, ufp0, rn, 0, align);
724 break;
725 case 2:
726 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
727 machInst, ufp0, rn, 0, align);
728 break;
729 case 4:
730 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
731 machInst, ufp0, rn, 0, align);
732 break;
733 }
734 break;
735 case 6:
736 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
737 machInst, ufp0, rn, 0, align);
738 break;
739 case 8:
740 switch (eBytes) {
741 case 2:
742 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
743 machInst, ufp0, rn, 0, align);
744 break;
745 case 4:
746 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
747 machInst, ufp0, rn, 0, align);
748 break;
749 }
750 break;
751 case 12:
752 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
753 machInst, ufp0, rn, 0, align);
754 break;
755 case 16:
756 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
757 machInst, ufp0, rn, 0, align);
758 break;
759 default:
717 }
718 switch (storeSize) {
719 case 1:
720 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
721 machInst, ufp0, rn, 0, align);
722 break;
723 case 2:
724 if (eBytes == 2) {
725 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
726 machInst, ufp0, rn, 0, align);
727 } else {
728 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
729 machInst, ufp0, rn, 0, align);
730 }
731 break;
732 case 3:
733 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
734 machInst, ufp0, rn, 0, align);
735 break;
736 case 4:
737 switch (eBytes) {
738 case 1:
739 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
740 machInst, ufp0, rn, 0, align);
741 break;
742 case 2:
743 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
744 machInst, ufp0, rn, 0, align);
745 break;
746 case 4:
747 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
748 machInst, ufp0, rn, 0, align);
749 break;
750 }
751 break;
752 case 6:
753 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
754 machInst, ufp0, rn, 0, align);
755 break;
756 case 8:
757 switch (eBytes) {
758 case 2:
759 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
760 machInst, ufp0, rn, 0, align);
761 break;
762 case 4:
763 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
764 machInst, ufp0, rn, 0, align);
765 break;
766 }
767 break;
768 case 12:
769 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
770 machInst, ufp0, rn, 0, align);
771 break;
772 case 16:
773 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
774 machInst, ufp0, rn, 0, align);
775 break;
776 default:
760 panic("Unrecognized store size %d.\n", regs);
777 // Bad store size
778 microOps[uopIdx++] = new Unknown(machInst);
761 }
762 if (wb) {
763 if (rm != 15 && rm != 13) {
764 microOps[uopIdx++] =
765 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
766 } else {
767 microOps[uopIdx++] =
768 new MicroAddiUop(machInst, rn, rn, storeSize);
769 }
770 }
771 assert(uopIdx == numMicroops);
772
773 for (unsigned i = 0; i < numMicroops - 1; i++) {
774 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
775 assert(uopPtr);
776 uopPtr->setDelayedCommit();
777 }
778 microOps[numMicroops - 1]->setLastMicroop();
779}
780
781MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
782 OpClass __opClass, IntRegIndex rn,
783 RegIndex vd, bool single, bool up,
784 bool writeback, bool load, uint32_t offset) :
785 PredMacroOp(mnem, machInst, __opClass)
786{
787 int i = 0;
788
789 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
790 // to be functionally identical except that fldmx is deprecated. For now
791 // we'll assume they're otherwise interchangable.
792 int count = (single ? offset : (offset / 2));
793 if (count == 0 || count > NumFloatArchRegs)
794 warn_once("Bad offset field for VFP load/store multiple.\n");
795 if (count == 0) {
796 // Force there to be at least one microop so the macroop makes sense.
797 writeback = true;
798 }
799 if (count > NumFloatArchRegs)
800 count = NumFloatArchRegs;
801
802 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
803 microOps = new StaticInstPtr[numMicroops];
804
805 int64_t addr = 0;
806
807 if (!up)
808 addr = 4 * offset;
809
810 bool tempUp = up;
811 for (int j = 0; j < count; j++) {
812 if (load) {
813 if (single) {
814 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
815 tempUp, addr);
816 } else {
817 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
818 tempUp, addr);
819 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
820 addr + (up ? 4 : -4));
821 }
822 } else {
823 if (single) {
824 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
825 tempUp, addr);
826 } else {
827 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
828 tempUp, addr);
829 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
830 addr + (up ? 4 : -4));
831 }
832 }
833 if (!tempUp) {
834 addr -= (single ? 4 : 8);
835 // The microops don't handle negative displacement, so turn if we
836 // hit zero, flip polarity and start adding.
837 if (addr <= 0) {
838 tempUp = true;
839 addr = -addr;
840 }
841 } else {
842 addr += (single ? 4 : 8);
843 }
844 }
845
846 if (writeback) {
847 if (up) {
848 microOps[i++] =
849 new MicroAddiUop(machInst, rn, rn, 4 * offset);
850 } else {
851 microOps[i++] =
852 new MicroSubiUop(machInst, rn, rn, 4 * offset);
853 }
854 }
855
856 assert(numMicroops == i);
857 microOps[numMicroops - 1]->setLastMicroop();
858
859 for (StaticInstPtr *curUop = microOps;
860 !(*curUop)->isLastMicroop(); curUop++) {
861 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
862 assert(uopPtr);
863 uopPtr->setDelayedCommit();
864 }
865}
866
867std::string
868MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
869{
870 std::stringstream ss;
871 printMnemonic(ss);
872 printReg(ss, ura);
873 ss << ", ";
874 printReg(ss, urb);
875 ss << ", ";
876 ccprintf(ss, "#%d", imm);
877 return ss.str();
878}
879
880std::string
881MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
882{
883 std::stringstream ss;
884 printMnemonic(ss);
885 printReg(ss, ura);
886 ss << ", ";
887 printReg(ss, urb);
888 return ss.str();
889}
890
891std::string
892MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
893{
894 std::stringstream ss;
895 printMnemonic(ss);
896 printReg(ss, ura);
897 ss << ", ";
898 printReg(ss, urb);
899 ss << ", ";
900 printReg(ss, urc);
901 return ss.str();
902}
903
904std::string
905MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
906{
907 std::stringstream ss;
908 printMnemonic(ss);
909 printReg(ss, ura);
910 ss << ", [";
911 printReg(ss, urb);
912 ss << ", ";
913 ccprintf(ss, "#%d", imm);
914 ss << "]";
915 return ss.str();
916}
917
918}
779 }
780 if (wb) {
781 if (rm != 15 && rm != 13) {
782 microOps[uopIdx++] =
783 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
784 } else {
785 microOps[uopIdx++] =
786 new MicroAddiUop(machInst, rn, rn, storeSize);
787 }
788 }
789 assert(uopIdx == numMicroops);
790
791 for (unsigned i = 0; i < numMicroops - 1; i++) {
792 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
793 assert(uopPtr);
794 uopPtr->setDelayedCommit();
795 }
796 microOps[numMicroops - 1]->setLastMicroop();
797}
798
799MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
800 OpClass __opClass, IntRegIndex rn,
801 RegIndex vd, bool single, bool up,
802 bool writeback, bool load, uint32_t offset) :
803 PredMacroOp(mnem, machInst, __opClass)
804{
805 int i = 0;
806
807 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
808 // to be functionally identical except that fldmx is deprecated. For now
809 // we'll assume they're otherwise interchangable.
810 int count = (single ? offset : (offset / 2));
811 if (count == 0 || count > NumFloatArchRegs)
812 warn_once("Bad offset field for VFP load/store multiple.\n");
813 if (count == 0) {
814 // Force there to be at least one microop so the macroop makes sense.
815 writeback = true;
816 }
817 if (count > NumFloatArchRegs)
818 count = NumFloatArchRegs;
819
820 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
821 microOps = new StaticInstPtr[numMicroops];
822
823 int64_t addr = 0;
824
825 if (!up)
826 addr = 4 * offset;
827
828 bool tempUp = up;
829 for (int j = 0; j < count; j++) {
830 if (load) {
831 if (single) {
832 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
833 tempUp, addr);
834 } else {
835 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
836 tempUp, addr);
837 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
838 addr + (up ? 4 : -4));
839 }
840 } else {
841 if (single) {
842 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
843 tempUp, addr);
844 } else {
845 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
846 tempUp, addr);
847 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
848 addr + (up ? 4 : -4));
849 }
850 }
851 if (!tempUp) {
852 addr -= (single ? 4 : 8);
853 // The microops don't handle negative displacement, so turn if we
854 // hit zero, flip polarity and start adding.
855 if (addr <= 0) {
856 tempUp = true;
857 addr = -addr;
858 }
859 } else {
860 addr += (single ? 4 : 8);
861 }
862 }
863
864 if (writeback) {
865 if (up) {
866 microOps[i++] =
867 new MicroAddiUop(machInst, rn, rn, 4 * offset);
868 } else {
869 microOps[i++] =
870 new MicroSubiUop(machInst, rn, rn, 4 * offset);
871 }
872 }
873
874 assert(numMicroops == i);
875 microOps[numMicroops - 1]->setLastMicroop();
876
877 for (StaticInstPtr *curUop = microOps;
878 !(*curUop)->isLastMicroop(); curUop++) {
879 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
880 assert(uopPtr);
881 uopPtr->setDelayedCommit();
882 }
883}
884
885std::string
886MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
887{
888 std::stringstream ss;
889 printMnemonic(ss);
890 printReg(ss, ura);
891 ss << ", ";
892 printReg(ss, urb);
893 ss << ", ";
894 ccprintf(ss, "#%d", imm);
895 return ss.str();
896}
897
898std::string
899MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
900{
901 std::stringstream ss;
902 printMnemonic(ss);
903 printReg(ss, ura);
904 ss << ", ";
905 printReg(ss, urb);
906 return ss.str();
907}
908
909std::string
910MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
911{
912 std::stringstream ss;
913 printMnemonic(ss);
914 printReg(ss, ura);
915 ss << ", ";
916 printReg(ss, urb);
917 ss << ", ";
918 printReg(ss, urc);
919 return ss.str();
920}
921
922std::string
923MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
924{
925 std::stringstream ss;
926 printMnemonic(ss);
927 printReg(ss, ura);
928 ss << ", [";
929 printReg(ss, urb);
930 ss << ", ";
931 ccprintf(ss, "#%d", imm);
932 ss << "]";
933 return ss.str();
934}
935
936}