Cross Reference: /gem5/src/arch/arm/insts/macromem.cc

macromem.cc (10339:53278be85b40)	macromem.cc (10346:d96b61d843b2)
1/* 2 * Copyright (c) 2010-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2007-2008 The Florida State University 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Stephen Hines 41 / 42 43#include <sstream> 44 45#include "arch/arm/insts/macromem.hh" 46 47#include "arch/arm/generated/decoder.hh" 48#include "arch/arm/insts/neon64_mem.hh" 49 50using namespace std; 51using namespace ArmISAInst; 52 53namespace ArmISA 54{ 55 56MacroMemOp::MacroMemOp(const char mnem, ExtMachInst machInst, 57 OpClass __opClass, IntRegIndex rn, 58 bool index, bool up, bool user, bool writeback, 59 bool load, uint32_t reglist) : 60 PredMacroOp(mnem, machInst, __opClass) 61{ 62 uint32_t regs = reglist; 63 uint32_t ones = number_of_ones(reglist);	1/* 2 * Copyright (c) 2010-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2007-2008 The Florida State University 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Stephen Hines 41 / 42 43#include <sstream> 44 45#include "arch/arm/insts/macromem.hh" 46 47#include "arch/arm/generated/decoder.hh" 48#include "arch/arm/insts/neon64_mem.hh" 49 50using namespace std; 51using namespace ArmISAInst; 52 53namespace ArmISA 54{ 55 56MacroMemOp::MacroMemOp(const char mnem, ExtMachInst machInst, 57 OpClass __opClass, IntRegIndex rn, 58 bool index, bool up, bool user, bool writeback, 59 bool load, uint32_t reglist) : 60 PredMacroOp(mnem, machInst, __opClass) 61{ 62 uint32_t regs = reglist; 63 uint32_t ones = number_of_ones(reglist);
64 // Remember that writeback adds a uop or two and the temp register adds one 65 numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1;	64 uint32_t mem_ops = ones;
66	65
67 // It's technically legal to do a lot of nothing 68 if (!ones)	66 // Copy the base address register if we overwrite it, or if this instruction 67 // is basically a no-op (we have to do something) 68 bool copy_base = (bits(reglist, rn) && load) \|\| !ones; 69 bool force_user = user & !bits(reglist, 15); 70 bool exception_ret = user & bits(reglist, 15); 71 bool pc_temp = load && writeback && bits(reglist, 15); 72 73 if (!ones) {
69 numMicroops = 1;	74 numMicroops = 1;
	75 } else if (load) { 76 numMicroops = ((ones + 1) / 2) 77 + ((ones % 2 == 0 && exception_ret) ? 1 : 0) 78 + (copy_base ? 1 : 0) 79 + (writeback? 1 : 0) 80 + (pc_temp ? 1 : 0); 81 } else { 82 numMicroops = ones + (writeback ? 1 : 0); 83 }
70 71 microOps = new StaticInstPtr[numMicroops];	84 85 microOps = new StaticInstPtr[numMicroops];
	86
72 uint32_t addr = 0; 73 74 if (!up) 75 addr = (ones << 2) - 4; 76 77 if (!index) 78 addr += 4; 79 80 StaticInstPtr *uop = microOps; 81 82 // Add 0 to Rn and stick it in ureg0. 83 // This is equivalent to a move.	87 uint32_t addr = 0; 88 89 if (!up) 90 addr = (ones << 2) - 4; 91 92 if (!index) 93 addr += 4; 94 95 StaticInstPtr *uop = microOps; 96 97 // Add 0 to Rn and stick it in ureg0. 98 // This is equivalent to a move.
84 *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);	99 if (copy_base) 100 *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
85 86 unsigned reg = 0;	101 102 unsigned reg = 0;
87 unsigned regIdx = 0; 88 bool force_user = user & !bits(reglist, 15); 89 bool exception_ret = user & bits(reglist, 15);	103 while (mem_ops != 0) { 104 // Do load operations in pairs if possible 105 if (load && mem_ops >= 2 && 106 !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) { 107 // 64-bit memory operation 108 // Find 2 set register bits (clear them after finding) 109 unsigned reg_idx1; 110 unsigned reg_idx2;
90	111
91 for (int i = 0; i < ones; i++) { 92 // Find the next register. 93 while (!bits(regs, reg)) 94 reg++; 95 replaceBits(regs, reg, 0);	112 // Find the first register 113 while (!bits(regs, reg)) reg++; 114 replaceBits(regs, reg, 0); 115 reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
96	116
97 regIdx = reg; 98 if (force_user) { 99 regIdx = intRegInMode(MODE_USER, regIdx); 100 }	117 // Find the second register 118 while (!bits(regs, reg)) reg++; 119 replaceBits(regs, reg, 0); 120 reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
101	121
102 if (load) { 103 if (writeback && i == ones - 1) { 104 // If it's a writeback and this is the last register 105 // do the load into a temporary register which we'll move 106 // into the final one later 107 ++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0, 108* up, addr); 109 } else { 110 // Otherwise just do it normally 111 if (reg == INTREG_PC && exception_ret) { 112 // This must be the exception return form of ldm. 113 ++uop = new MicroLdrRetUop(machInst, regIdx, 114* INTREG_UREG0, up, addr);	122 // Load into temp reg if necessary 123 if (reg_idx2 == INTREG_PC && pc_temp) 124 reg_idx2 = INTREG_UREG1; 125 126 // Actually load both registers from memory 127 uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2, 128* copy_base ? INTREG_UREG0 : rn, up, addr); 129 130 if (!writeback && reg_idx2 == INTREG_PC) { 131 // No writeback if idx==pc, set appropriate flags 132 (uop)->setFlag(StaticInst::IsControl); 133* (uop)->setFlag(StaticInst::IsIndirectControl); 134* 135 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 136 (uop)->setFlag(StaticInst::IsCondControl); 137* else 138 (uop)->setFlag(StaticInst::IsUncondControl); 139* } 140 141 if (up) addr += 8; 142 else addr -= 8; 143 mem_ops -= 2; 144 } else { 145 // 32-bit memory operation 146 // Find register for operation 147 unsigned reg_idx; 148 while(!bits(regs, reg)) reg++; 149 replaceBits(regs, reg, 0); 150 reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg; 151 152 if (load) { 153 if (writeback && reg_idx == INTREG_PC) { 154 // If this instruction changes the PC and performs a 155 // writeback, ensure the pc load/branch is the last uop. 156 // Load into a temp reg here. 157 uop = new MicroLdrUop(machInst, INTREG_UREG1, 158* copy_base ? INTREG_UREG0 : rn, up, addr); 159 } else if (reg_idx == INTREG_PC && exception_ret) { 160 // Special handling for exception return 161 uop = new MicroLdrRetUop(machInst, reg_idx, 162* copy_base ? INTREG_UREG0 : rn, up, addr); 163 } else { 164 // standard single load uop 165 uop = new MicroLdrUop(machInst, reg_idx, 166* copy_base ? INTREG_UREG0 : rn, up, addr); 167 } 168 169 // Loading pc as last operation? Set appropriate flags. 170 if (!writeback && reg_idx == INTREG_PC) { 171 (uop)->setFlag(StaticInst::IsControl); 172* (uop)->setFlag(StaticInst::IsIndirectControl); 173*
115 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 116 (uop)->setFlag(StaticInst::IsCondControl); 117* else 118 (*uop)->setFlag(StaticInst::IsUncondControl);	174 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 175 (uop)->setFlag(StaticInst::IsCondControl); 176* else 177 (*uop)->setFlag(StaticInst::IsUncondControl);
119 } else { 120 ++uop = new MicroLdrUop(machInst, regIdx, 121* INTREG_UREG0, up, addr); 122 if (reg == INTREG_PC) { 123 (uop)->setFlag(StaticInst::IsControl); 124* if (!(condCode == COND_AL \|\| condCode == COND_UC)) 125 (uop)->setFlag(StaticInst::IsCondControl); 126* else 127 (uop)->setFlag(StaticInst::IsUncondControl); 128* (uop)->setFlag(StaticInst::IsIndirectControl); 129* }
130 }	178 }
	179 } else { 180 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
131 }	181 }
132 } else { 133 *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);	182 183 if (up) addr += 4; 184 else addr -= 4; 185 --mem_ops;
134 } 135	186 } 187
136 if (up) 137 addr += 4; 138 else 139 addr -= 4;	188 // Load/store micro-op generated, go to next uop 189 ++uop;
140 } 141 142 if (writeback && ones) {	190 } 191 192 if (writeback && ones) {
143 // put the register update after we're done all loading	193 // Perform writeback uop operation
144 if (up)	194 if (up)
145 ++uop = new MicroAddiUop(machInst, rn, rn, ones 4);	195 uop++ = new MicroAddiUop(machInst, rn, rn, ones 4);
146 else	196 else
147 ++uop = new MicroSubiUop(machInst, rn, rn, ones 4);	197 uop++ = new MicroSubiUop(machInst, rn, rn, ones 4);
148	198
149 // If this was a load move the last temporary value into place 150 // this way we can't take an exception after we update the base 151 // register. 152 if (load && reg == INTREG_PC && exception_ret) { 153 *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);	199 // Write PC after address writeback? 200 if (pc_temp) { 201 if (exception_ret) { 202 uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); 203* } else { 204 uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1); 205* } 206 (uop)->setFlag(StaticInst::IsControl); 207* (uop)->setFlag(StaticInst::IsIndirectControl); 208*
154 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 155 (uop)->setFlag(StaticInst::IsCondControl); 156* else 157 (*uop)->setFlag(StaticInst::IsUncondControl);	209 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 210 (uop)->setFlag(StaticInst::IsCondControl); 211* else 212 (*uop)->setFlag(StaticInst::IsUncondControl);
158 } else if (load) { 159 ++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1); 160* if (reg == INTREG_PC) { 161 (uop)->setFlag(StaticInst::IsControl); 162* (uop)->setFlag(StaticInst::IsCondControl); 163* (uop)->setFlag(StaticInst::IsIndirectControl); 164* // This is created as a RAS POP 165 if (rn == INTREG_SP) 166 (*uop)->setFlag(StaticInst::IsReturn);
167	213
168 }	214 if (rn == INTREG_SP) 215 (uop)->setFlag(StaticInst::IsReturn); 216* 217 ++uop;
169 } 170 } 171	218 } 219 } 220
	221 --uop;
172 (uop)->setLastMicroop(); 173* 174 /* Take the control flags from the last microop for the macroop / 175* if ((uop)->isControl()) 176* setFlag(StaticInst::IsControl); 177 if ((uop)->isCondCtrl()) 178* setFlag(StaticInst::IsCondControl);	222 (uop)->setLastMicroop(); 223* 224 /* Take the control flags from the last microop for the macroop / 225* if ((uop)->isControl()) 226* setFlag(StaticInst::IsControl); 227 if ((uop)->isCondCtrl()) 228* setFlag(StaticInst::IsCondControl);
	229 if ((uop)->isUncondCtrl()) 230* setFlag(StaticInst::IsUncondControl);
179 if ((uop)->isIndirectCtrl()) 180* setFlag(StaticInst::IsIndirectControl); 181 if ((uop)->isReturn()) 182* setFlag(StaticInst::IsReturn); 183	231 if ((uop)->isIndirectCtrl()) 232* setFlag(StaticInst::IsIndirectControl); 233 if ((uop)->isReturn()) 234* setFlag(StaticInst::IsReturn); 235
184 for (StaticInstPtr curUop = microOps; 185* !(curUop)->isLastMicroop(); curUop++) { 186* MicroOp * uopPtr = dynamic_cast<MicroOp >(curUop->get()); 187* assert(uopPtr); 188 uopPtr->setDelayedCommit();	236 for (StaticInstPtr uop = microOps; !(uop)->isLastMicroop(); uop++) { 237 (*uop)->setDelayedCommit();
189 } 190} 191 192PairMemOp::PairMemOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 193* uint32_t size, bool fp, bool load, bool noAlloc, 194 bool signExt, bool exclusive, bool acrel, 195 int64_t imm, AddrMode mode, 196 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : 197 PredMacroOp(mnem, machInst, __opClass) 198{	238 } 239} 240 241PairMemOp::PairMemOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 242* uint32_t size, bool fp, bool load, bool noAlloc, 243 bool signExt, bool exclusive, bool acrel, 244 int64_t imm, AddrMode mode, 245 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : 246 PredMacroOp(mnem, machInst, __opClass) 247{
	248 bool post = (mode == AddrMd_PostIndex);
199 bool writeback = (mode != AddrMd_Offset);	249 bool writeback = (mode != AddrMd_Offset);
200 numMicroops = 1 + (size / 4) + (writeback ? 1 : 0);	250 251 if (load) { 252 // Use integer rounding to round up loads of size 4 253 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0); 254 } else { 255 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0); 256 }
201 microOps = new StaticInstPtr[numMicroops]; 202 203 StaticInstPtr uop = microOps; 204*	257 microOps = new StaticInstPtr[numMicroops]; 258 259 StaticInstPtr uop = microOps; 260*
205 bool post = (mode == AddrMd_PostIndex); 206
207 rn = makeSP(rn); 208	261 rn = makeSP(rn); 262
209 *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm);	263 if (!post) { 264 uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, 265* post ? 0 : imm); 266 }
210 211 if (fp) { 212 if (size == 16) { 213 if (load) {	267 268 if (fp) { 269 if (size == 16) { 270 if (load) {
214 ++uop = new MicroLdrQBFpXImmUop(machInst, rt, 215* INTREG_UREG0, 0, noAlloc, exclusive, acrel); 216 ++uop = new MicroLdrQTFpXImmUop(machInst, rt, 217* INTREG_UREG0, 0, noAlloc, exclusive, acrel); 218 ++uop = new MicroLdrQBFpXImmUop(machInst, rt2, 219* INTREG_UREG0, 16, noAlloc, exclusive, acrel); 220 ++uop = new MicroLdrQTFpXImmUop(machInst, rt2, 221* INTREG_UREG0, 16, noAlloc, exclusive, acrel);	271 uop++ = new MicroLdFp16Uop(machInst, rt, 272* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 273 uop++ = new MicroLdFp16Uop(machInst, rt2, 274* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
222 } else {	275 } else {
223 ++uop = new MicroStrQBFpXImmUop(machInst, rt, 224* INTREG_UREG0, 0, noAlloc, exclusive, acrel); 225 ++uop = new MicroStrQTFpXImmUop(machInst, rt, 226* INTREG_UREG0, 0, noAlloc, exclusive, acrel); 227 ++uop = new MicroStrQBFpXImmUop(machInst, rt2, 228* INTREG_UREG0, 16, noAlloc, exclusive, acrel); 229 ++uop = new MicroStrQTFpXImmUop(machInst, rt2, 230* INTREG_UREG0, 16, noAlloc, exclusive, acrel);	276 uop++ = new MicroStrQBFpXImmUop(machInst, rt, 277* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 278 uop++ = new MicroStrQTFpXImmUop(machInst, rt, 279* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 280 uop++ = new MicroStrQBFpXImmUop(machInst, rt2, 281* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 282 uop++ = new MicroStrQTFpXImmUop(machInst, rt2, 283* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
231 } 232 } else if (size == 8) { 233 if (load) {	284 } 285 } else if (size == 8) { 286 if (load) {
234 ++uop = new MicroLdrFpXImmUop(machInst, rt, 235* INTREG_UREG0, 0, noAlloc, exclusive, acrel); 236 ++uop = new MicroLdrFpXImmUop(machInst, rt2, 237* INTREG_UREG0, 8, noAlloc, exclusive, acrel);	287 uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2, 288* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
238 } else {	289 } else {
239 ++uop = new MicroStrFpXImmUop(machInst, rt, 240* INTREG_UREG0, 0, noAlloc, exclusive, acrel); 241 ++uop = new MicroStrFpXImmUop(machInst, rt2, 242* INTREG_UREG0, 8, noAlloc, exclusive, acrel);	290 uop++ = new MicroStrFpXImmUop(machInst, rt, 291* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 292 uop++ = new MicroStrFpXImmUop(machInst, rt2, 293* post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
243 } 244 } else if (size == 4) { 245 if (load) {	294 } 295 } else if (size == 4) { 296 if (load) {
246 ++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, 247* INTREG_UREG0, 0, noAlloc, exclusive, acrel);	297 uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2, 298* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
248 } else {	299 } else {
249 ++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, 250* INTREG_UREG0, 0, noAlloc, exclusive, acrel);	300 uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2, 301* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
251 } 252 } 253 } else { 254 if (size == 8) { 255 if (load) {	302 } 303 } 304 } else { 305 if (size == 8) { 306 if (load) {
256 ++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, 257* 0, noAlloc, exclusive, acrel); 258 ++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, 259* size, noAlloc, exclusive, acrel);	307 uop++ = new MicroLdPairUop(machInst, rt, rt2, 308* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
260 } else {	309 } else {
261 *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0,	310 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
262 0, noAlloc, exclusive, acrel);	311 0, noAlloc, exclusive, acrel);
263 *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0,	312 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
264 size, noAlloc, exclusive, acrel); 265 } 266 } else if (size == 4) { 267 if (load) { 268 if (signExt) {	313 size, noAlloc, exclusive, acrel); 314 } 315 } else if (size == 4) { 316 if (load) { 317 if (signExt) {
269 ++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, 270* INTREG_UREG0, 0, noAlloc, exclusive, acrel);	318 uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2, 319* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
271 } else {	320 } else {
272 ++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, 273* INTREG_UREG0, 0, noAlloc, exclusive, acrel);	321 uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2, 322* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
274 } 275 } else {	323 } 324 } else {
276 ++uop = new MicroStrDXImmUop(machInst, rt, rt2, 277* INTREG_UREG0, 0, noAlloc, exclusive, acrel);	325 uop++ = new MicroStrDXImmUop(machInst, rt, rt2, 326* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
278 } 279 } 280 } 281 282 if (writeback) {	327 } 328 } 329 } 330 331 if (writeback) {
283 *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0,	332 *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
284 post ? imm : 0); 285 } 286	333 post ? imm : 0); 334 } 335
287 (*uop)->setLastMicroop();	336 assert(uop == &microOps[numMicroops]); 337 (*--uop)->setLastMicroop();
288 289 for (StaticInstPtr curUop = microOps; 290* !(curUop)->isLastMicroop(); curUop++) { 291* (curUop)->setDelayedCommit(); 292* } 293} 294 295BigFpMemImmOp::BigFpMemImmOp(const char mnem, ExtMachInst machInst, 296* OpClass __opClass, bool load, IntRegIndex dest, 297 IntRegIndex base, int64_t imm) : 298 PredMacroOp(mnem, machInst, __opClass) 299{	338 339 for (StaticInstPtr curUop = microOps; 340* !(curUop)->isLastMicroop(); curUop++) { 341* (curUop)->setDelayedCommit(); 342* } 343} 344 345BigFpMemImmOp::BigFpMemImmOp(const char mnem, ExtMachInst machInst, 346* OpClass __opClass, bool load, IntRegIndex dest, 347 IntRegIndex base, int64_t imm) : 348 PredMacroOp(mnem, machInst, __opClass) 349{
300 numMicroops = 2;	350 numMicroops = load ? 1 : 2;
301 microOps = new StaticInstPtr[numMicroops]; 302	351 microOps = new StaticInstPtr[numMicroops]; 352
	353 StaticInstPtr uop = microOps; 354*
303 if (load) {	355 if (load) {
304 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); 305 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);	356 *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
306 } else {	357 } else {
307 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 308 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);	358 uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 359* (uop)->setDelayedCommit(); 360* *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
309 }	361 }
310 microOps[0]->setDelayedCommit(); 311 microOps[1]->setLastMicroop();	362 (*uop)->setLastMicroop();
312} 313 314BigFpMemPostOp::BigFpMemPostOp(const char mnem, ExtMachInst machInst, 315* OpClass __opClass, bool load, IntRegIndex dest, 316 IntRegIndex base, int64_t imm) : 317 PredMacroOp(mnem, machInst, __opClass) 318{	363} 364 365BigFpMemPostOp::BigFpMemPostOp(const char mnem, ExtMachInst machInst, 366* OpClass __opClass, bool load, IntRegIndex dest, 367 IntRegIndex base, int64_t imm) : 368 PredMacroOp(mnem, machInst, __opClass) 369{
319 numMicroops = 3;	370 numMicroops = load ? 2 : 3;
320 microOps = new StaticInstPtr[numMicroops]; 321	371 microOps = new StaticInstPtr[numMicroops]; 372
	373 StaticInstPtr uop = microOps; 374*
322 if (load) {	375 if (load) {
323 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); 324 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0);	376 *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
325 } else {	377 } else {
326 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); 327 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0);	378 uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0); 379* *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
328 }	380 }
329 microOps[2] = new MicroAddXiUop(machInst, base, base, imm);	381 uop = new MicroAddXiUop(machInst, base, base, imm); 382* (*uop)->setLastMicroop();
330	383
331 microOps[0]->setDelayedCommit(); 332 microOps[1]->setDelayedCommit(); 333 microOps[2]->setLastMicroop();	384 for (StaticInstPtr curUop = microOps; 385* !(curUop)->isLastMicroop(); curUop++) { 386* (curUop)->setDelayedCommit(); 387* }
334} 335 336BigFpMemPreOp::BigFpMemPreOp(const char mnem, ExtMachInst machInst, 337* OpClass __opClass, bool load, IntRegIndex dest, 338 IntRegIndex base, int64_t imm) : 339 PredMacroOp(mnem, machInst, __opClass) 340{	388} 389 390BigFpMemPreOp::BigFpMemPreOp(const char mnem, ExtMachInst machInst, 391* OpClass __opClass, bool load, IntRegIndex dest, 392 IntRegIndex base, int64_t imm) : 393 PredMacroOp(mnem, machInst, __opClass) 394{
341 numMicroops = 3;	395 numMicroops = load ? 2 : 3;
342 microOps = new StaticInstPtr[numMicroops]; 343	396 microOps = new StaticInstPtr[numMicroops]; 397
	398 StaticInstPtr uop = microOps; 399*
344 if (load) {	400 if (load) {
345 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); 346 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);	401 *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
347 } else {	402 } else {
348 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 349 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);	403 uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 404* *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
350 }	405 }
351 microOps[2] = new MicroAddXiUop(machInst, base, base, imm);	406 uop = new MicroAddXiUop(machInst, base, base, imm); 407* (*uop)->setLastMicroop();
352	408
353 microOps[0]->setDelayedCommit(); 354 microOps[1]->setDelayedCommit(); 355 microOps[2]->setLastMicroop();	409 for (StaticInstPtr curUop = microOps; 410* !(curUop)->isLastMicroop(); curUop++) { 411* (curUop)->setDelayedCommit(); 412* }
356} 357 358BigFpMemRegOp::BigFpMemRegOp(const char mnem, ExtMachInst machInst, 359* OpClass __opClass, bool load, IntRegIndex dest, 360 IntRegIndex base, IntRegIndex offset, 361 ArmExtendType type, int64_t imm) : 362 PredMacroOp(mnem, machInst, __opClass) 363{	413} 414 415BigFpMemRegOp::BigFpMemRegOp(const char mnem, ExtMachInst machInst, 416* OpClass __opClass, bool load, IntRegIndex dest, 417 IntRegIndex base, IntRegIndex offset, 418 ArmExtendType type, int64_t imm) : 419 PredMacroOp(mnem, machInst, __opClass) 420{
364 numMicroops = 2;	421 numMicroops = load ? 1 : 2;
365 microOps = new StaticInstPtr[numMicroops]; 366	422 microOps = new StaticInstPtr[numMicroops]; 423
	424 StaticInstPtr uop = microOps; 425*
367 if (load) {	426 if (load) {
368 microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, 369 offset, type, imm); 370 microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, 371 offset, type, imm);	427 uop = new MicroLdFp16RegUop(machInst, dest, base, 428* offset, type, imm);
372 } else {	429 } else {
373 microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, 374 offset, type, imm); 375 microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, 376 offset, type, imm);	430 uop = new MicroStrQBFpXRegUop(machInst, dest, base, 431* offset, type, imm); 432 (uop)->setDelayedCommit(); 433* ++uop = new MicroStrQTFpXRegUop(machInst, dest, base, 434* offset, type, imm);
377 } 378	435 } 436
379 microOps[0]->setDelayedCommit(); 380 microOps[1]->setLastMicroop();	437 (*uop)->setLastMicroop();
381} 382 383BigFpMemLitOp::BigFpMemLitOp(const char mnem, ExtMachInst machInst, 384* OpClass __opClass, IntRegIndex dest, 385 int64_t imm) : 386 PredMacroOp(mnem, machInst, __opClass) 387{	438} 439 440BigFpMemLitOp::BigFpMemLitOp(const char mnem, ExtMachInst machInst, 441* OpClass __opClass, IntRegIndex dest, 442 int64_t imm) : 443 PredMacroOp(mnem, machInst, __opClass) 444{
388 numMicroops = 2;	445 numMicroops = 1;
389 microOps = new StaticInstPtr[numMicroops]; 390	446 microOps = new StaticInstPtr[numMicroops]; 447
391 microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); 392 microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); 393 394 microOps[0]->setDelayedCommit(); 395 microOps[1]->setLastMicroop();	448 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm); 449 microOps[0]->setLastMicroop();
396} 397 398VldMultOp::VldMultOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 399* unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 400 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 401 PredMacroOp(mnem, machInst, __opClass) 402{ 403 assert(regs > 0 && regs <= 4); 404 assert(regs % elems == 0); 405 406 numMicroops = (regs > 2) ? 2 : 1; 407 bool wb = (rm != 15); 408 bool deinterleave = (elems > 1); 409 410 if (wb) numMicroops++; 411 if (deinterleave) numMicroops += (regs / elems); 412 microOps = new StaticInstPtr[numMicroops]; 413 414 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 415 416 uint32_t noAlign = TLB::MustBeOne; 417 418 unsigned uopIdx = 0; 419 switch (regs) { 420 case 4: 421 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 422 size, machInst, rMid, rn, 0, align); 423 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 424 size, machInst, rMid + 4, rn, 16, noAlign); 425 break; 426 case 3: 427 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 428 size, machInst, rMid, rn, 0, align); 429 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 430 size, machInst, rMid + 4, rn, 16, noAlign); 431 break; 432 case 2: 433 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 434 size, machInst, rMid, rn, 0, align); 435 break; 436 case 1: 437 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 438 size, machInst, rMid, rn, 0, align); 439 break; 440 default: 441 // Unknown number of registers 442 microOps[uopIdx++] = new Unknown(machInst); 443 } 444 if (wb) { 445 if (rm != 15 && rm != 13) { 446 microOps[uopIdx++] = 447 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 448 } else { 449 microOps[uopIdx++] = 450 new MicroAddiUop(machInst, rn, rn, regs * 8); 451 } 452 } 453 if (deinterleave) { 454 switch (elems) { 455 case 4: 456 assert(regs == 4); 457 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 458 size, machInst, vd * 2, rMid, inc * 2); 459 break; 460 case 3: 461 assert(regs == 3); 462 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 463 size, machInst, vd * 2, rMid, inc * 2); 464 break; 465 case 2: 466 assert(regs == 4 \|\| regs == 2); 467 if (regs == 4) { 468 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 469 size, machInst, vd * 2, rMid, inc * 2); 470 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 471 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 472 } else { 473 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 474 size, machInst, vd * 2, rMid, inc * 2); 475 } 476 break; 477 default: 478 // Bad number of elements to deinterleave 479 microOps[uopIdx++] = new Unknown(machInst); 480 } 481 } 482 assert(uopIdx == numMicroops); 483 484 for (unsigned i = 0; i < numMicroops - 1; i++) { 485 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 486* assert(uopPtr); 487 uopPtr->setDelayedCommit(); 488 } 489 microOps[numMicroops - 1]->setLastMicroop(); 490} 491 492VldSingleOp::VldSingleOp(const char mnem, ExtMachInst machInst, 493* OpClass __opClass, bool all, unsigned elems, 494 RegIndex rn, RegIndex vd, unsigned regs, 495 unsigned inc, uint32_t size, uint32_t align, 496 RegIndex rm, unsigned lane) : 497 PredMacroOp(mnem, machInst, __opClass) 498{ 499 assert(regs > 0 && regs <= 4); 500 assert(regs % elems == 0); 501 502 unsigned eBytes = (1 << size); 503 unsigned loadSize = eBytes * elems; 504 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 505 sizeof(FloatRegBits); 506 507 assert(loadRegs > 0 && loadRegs <= 4); 508 509 numMicroops = 1; 510 bool wb = (rm != 15); 511 512 if (wb) numMicroops++; 513 numMicroops += (regs / elems); 514 microOps = new StaticInstPtr[numMicroops]; 515 516 RegIndex ufp0 = NumFloatV7ArchRegs; 517 518 unsigned uopIdx = 0; 519 switch (loadSize) { 520 case 1: 521 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 522 machInst, ufp0, rn, 0, align); 523 break; 524 case 2: 525 if (eBytes == 2) { 526 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 527 machInst, ufp0, rn, 0, align); 528 } else { 529 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 530 machInst, ufp0, rn, 0, align); 531 } 532 break; 533 case 3: 534 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 535 machInst, ufp0, rn, 0, align); 536 break; 537 case 4: 538 switch (eBytes) { 539 case 1: 540 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 541 machInst, ufp0, rn, 0, align); 542 break; 543 case 2: 544 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 545 machInst, ufp0, rn, 0, align); 546 break; 547 case 4: 548 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 549 machInst, ufp0, rn, 0, align); 550 break; 551 } 552 break; 553 case 6: 554 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 555 machInst, ufp0, rn, 0, align); 556 break; 557 case 8: 558 switch (eBytes) { 559 case 2: 560 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 561 machInst, ufp0, rn, 0, align); 562 break; 563 case 4: 564 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 565 machInst, ufp0, rn, 0, align); 566 break; 567 } 568 break; 569 case 12: 570 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 571 machInst, ufp0, rn, 0, align); 572 break; 573 case 16: 574 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 575 machInst, ufp0, rn, 0, align); 576 break; 577 default: 578 // Unrecognized load size 579 microOps[uopIdx++] = new Unknown(machInst); 580 } 581 if (wb) { 582 if (rm != 15 && rm != 13) { 583 microOps[uopIdx++] = 584 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 585 } else { 586 microOps[uopIdx++] = 587 new MicroAddiUop(machInst, rn, rn, loadSize); 588 } 589 } 590 switch (elems) { 591 case 4: 592 assert(regs == 4); 593 switch (size) { 594 case 0: 595 if (all) { 596 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 597 machInst, vd * 2, ufp0, inc * 2); 598 } else { 599 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 600 machInst, vd * 2, ufp0, inc * 2, lane); 601 } 602 break; 603 case 1: 604 if (all) { 605 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 606 machInst, vd * 2, ufp0, inc * 2); 607 } else { 608 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 609 machInst, vd * 2, ufp0, inc * 2, lane); 610 } 611 break; 612 case 2: 613 if (all) { 614 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 615 machInst, vd * 2, ufp0, inc * 2); 616 } else { 617 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 618 machInst, vd * 2, ufp0, inc * 2, lane); 619 } 620 break; 621 default: 622 // Bad size 623 microOps[uopIdx++] = new Unknown(machInst); 624 break; 625 } 626 break; 627 case 3: 628 assert(regs == 3); 629 switch (size) { 630 case 0: 631 if (all) { 632 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 633 machInst, vd * 2, ufp0, inc * 2); 634 } else { 635 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 636 machInst, vd * 2, ufp0, inc * 2, lane); 637 } 638 break; 639 case 1: 640 if (all) { 641 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 642 machInst, vd * 2, ufp0, inc * 2); 643 } else { 644 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 645 machInst, vd * 2, ufp0, inc * 2, lane); 646 } 647 break; 648 case 2: 649 if (all) { 650 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 651 machInst, vd * 2, ufp0, inc * 2); 652 } else { 653 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 654 machInst, vd * 2, ufp0, inc * 2, lane); 655 } 656 break; 657 default: 658 // Bad size 659 microOps[uopIdx++] = new Unknown(machInst); 660 break; 661 } 662 break; 663 case 2: 664 assert(regs == 2); 665 assert(loadRegs <= 2); 666 switch (size) { 667 case 0: 668 if (all) { 669 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 670 machInst, vd * 2, ufp0, inc * 2); 671 } else { 672 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 673 machInst, vd * 2, ufp0, inc * 2, lane); 674 } 675 break; 676 case 1: 677 if (all) { 678 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 679 machInst, vd * 2, ufp0, inc * 2); 680 } else { 681 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 682 machInst, vd * 2, ufp0, inc * 2, lane); 683 } 684 break; 685 case 2: 686 if (all) { 687 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 688 machInst, vd * 2, ufp0, inc * 2); 689 } else { 690 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 691 machInst, vd * 2, ufp0, inc * 2, lane); 692 } 693 break; 694 default: 695 // Bad size 696 microOps[uopIdx++] = new Unknown(machInst); 697 break; 698 } 699 break; 700 case 1: 701 assert(regs == 1 \|\| (all && regs == 2)); 702 assert(loadRegs <= 2); 703 for (unsigned offset = 0; offset < regs; offset++) { 704 switch (size) { 705 case 0: 706 if (all) { 707 microOps[uopIdx++] = 708 new MicroUnpackAllNeon2to2Uop<uint8_t>( 709 machInst, (vd + offset) * 2, ufp0, inc * 2); 710 } else { 711 microOps[uopIdx++] = 712 new MicroUnpackNeon2to2Uop<uint8_t>( 713 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 714 } 715 break; 716 case 1: 717 if (all) { 718 microOps[uopIdx++] = 719 new MicroUnpackAllNeon2to2Uop<uint16_t>( 720 machInst, (vd + offset) * 2, ufp0, inc * 2); 721 } else { 722 microOps[uopIdx++] = 723 new MicroUnpackNeon2to2Uop<uint16_t>( 724 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 725 } 726 break; 727 case 2: 728 if (all) { 729 microOps[uopIdx++] = 730 new MicroUnpackAllNeon2to2Uop<uint32_t>( 731 machInst, (vd + offset) * 2, ufp0, inc * 2); 732 } else { 733 microOps[uopIdx++] = 734 new MicroUnpackNeon2to2Uop<uint32_t>( 735 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 736 } 737 break; 738 default: 739 // Bad size 740 microOps[uopIdx++] = new Unknown(machInst); 741 break; 742 } 743 } 744 break; 745 default: 746 // Bad number of elements to unpack 747 microOps[uopIdx++] = new Unknown(machInst); 748 } 749 assert(uopIdx == numMicroops); 750 751 for (unsigned i = 0; i < numMicroops - 1; i++) { 752 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 753* assert(uopPtr); 754 uopPtr->setDelayedCommit(); 755 } 756 microOps[numMicroops - 1]->setLastMicroop(); 757} 758 759VstMultOp::VstMultOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 760* unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 761 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 762 PredMacroOp(mnem, machInst, __opClass) 763{ 764 assert(regs > 0 && regs <= 4); 765 assert(regs % elems == 0); 766 767 numMicroops = (regs > 2) ? 2 : 1; 768 bool wb = (rm != 15); 769 bool interleave = (elems > 1); 770 771 if (wb) numMicroops++; 772 if (interleave) numMicroops += (regs / elems); 773 microOps = new StaticInstPtr[numMicroops]; 774 775 uint32_t noAlign = TLB::MustBeOne; 776 777 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 778 779 unsigned uopIdx = 0; 780 if (interleave) { 781 switch (elems) { 782 case 4: 783 assert(regs == 4); 784 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 785 size, machInst, rMid, vd * 2, inc * 2); 786 break; 787 case 3: 788 assert(regs == 3); 789 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 790 size, machInst, rMid, vd * 2, inc * 2); 791 break; 792 case 2: 793 assert(regs == 4 \|\| regs == 2); 794 if (regs == 4) { 795 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 796 size, machInst, rMid, vd * 2, inc * 2); 797 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 798 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 799 } else { 800 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 801 size, machInst, rMid, vd * 2, inc * 2); 802 } 803 break; 804 default: 805 // Bad number of elements to interleave 806 microOps[uopIdx++] = new Unknown(machInst); 807 } 808 } 809 switch (regs) { 810 case 4: 811 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 812 size, machInst, rMid, rn, 0, align); 813 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 814 size, machInst, rMid + 4, rn, 16, noAlign); 815 break; 816 case 3: 817 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 818 size, machInst, rMid, rn, 0, align); 819 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 820 size, machInst, rMid + 4, rn, 16, noAlign); 821 break; 822 case 2: 823 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 824 size, machInst, rMid, rn, 0, align); 825 break; 826 case 1: 827 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 828 size, machInst, rMid, rn, 0, align); 829 break; 830 default: 831 // Unknown number of registers 832 microOps[uopIdx++] = new Unknown(machInst); 833 } 834 if (wb) { 835 if (rm != 15 && rm != 13) { 836 microOps[uopIdx++] = 837 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 838 } else { 839 microOps[uopIdx++] = 840 new MicroAddiUop(machInst, rn, rn, regs * 8); 841 } 842 } 843 assert(uopIdx == numMicroops); 844 845 for (unsigned i = 0; i < numMicroops - 1; i++) { 846 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 847* assert(uopPtr); 848 uopPtr->setDelayedCommit(); 849 } 850 microOps[numMicroops - 1]->setLastMicroop(); 851} 852 853VstSingleOp::VstSingleOp(const char mnem, ExtMachInst machInst, 854* OpClass __opClass, bool all, unsigned elems, 855 RegIndex rn, RegIndex vd, unsigned regs, 856 unsigned inc, uint32_t size, uint32_t align, 857 RegIndex rm, unsigned lane) : 858 PredMacroOp(mnem, machInst, __opClass) 859{ 860 assert(!all); 861 assert(regs > 0 && regs <= 4); 862 assert(regs % elems == 0); 863 864 unsigned eBytes = (1 << size); 865 unsigned storeSize = eBytes * elems; 866 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 867 sizeof(FloatRegBits); 868 869 assert(storeRegs > 0 && storeRegs <= 4); 870 871 numMicroops = 1; 872 bool wb = (rm != 15); 873 874 if (wb) numMicroops++; 875 numMicroops += (regs / elems); 876 microOps = new StaticInstPtr[numMicroops]; 877 878 RegIndex ufp0 = NumFloatV7ArchRegs; 879 880 unsigned uopIdx = 0; 881 switch (elems) { 882 case 4: 883 assert(regs == 4); 884 switch (size) { 885 case 0: 886 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 887 machInst, ufp0, vd * 2, inc * 2, lane); 888 break; 889 case 1: 890 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 891 machInst, ufp0, vd * 2, inc * 2, lane); 892 break; 893 case 2: 894 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 895 machInst, ufp0, vd * 2, inc * 2, lane); 896 break; 897 default: 898 // Bad size 899 microOps[uopIdx++] = new Unknown(machInst); 900 break; 901 } 902 break; 903 case 3: 904 assert(regs == 3); 905 switch (size) { 906 case 0: 907 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 908 machInst, ufp0, vd * 2, inc * 2, lane); 909 break; 910 case 1: 911 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 912 machInst, ufp0, vd * 2, inc * 2, lane); 913 break; 914 case 2: 915 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 916 machInst, ufp0, vd * 2, inc * 2, lane); 917 break; 918 default: 919 // Bad size 920 microOps[uopIdx++] = new Unknown(machInst); 921 break; 922 } 923 break; 924 case 2: 925 assert(regs == 2); 926 assert(storeRegs <= 2); 927 switch (size) { 928 case 0: 929 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 930 machInst, ufp0, vd * 2, inc * 2, lane); 931 break; 932 case 1: 933 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 934 machInst, ufp0, vd * 2, inc * 2, lane); 935 break; 936 case 2: 937 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 938 machInst, ufp0, vd * 2, inc * 2, lane); 939 break; 940 default: 941 // Bad size 942 microOps[uopIdx++] = new Unknown(machInst); 943 break; 944 } 945 break; 946 case 1: 947 assert(regs == 1 \|\| (all && regs == 2)); 948 assert(storeRegs <= 2); 949 for (unsigned offset = 0; offset < regs; offset++) { 950 switch (size) { 951 case 0: 952 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 953 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 954 break; 955 case 1: 956 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 957 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 958 break; 959 case 2: 960 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 961 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 962 break; 963 default: 964 // Bad size 965 microOps[uopIdx++] = new Unknown(machInst); 966 break; 967 } 968 } 969 break; 970 default: 971 // Bad number of elements to unpack 972 microOps[uopIdx++] = new Unknown(machInst); 973 } 974 switch (storeSize) { 975 case 1: 976 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 977 machInst, ufp0, rn, 0, align); 978 break; 979 case 2: 980 if (eBytes == 2) { 981 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 982 machInst, ufp0, rn, 0, align); 983 } else { 984 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 985 machInst, ufp0, rn, 0, align); 986 } 987 break; 988 case 3: 989 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 990 machInst, ufp0, rn, 0, align); 991 break; 992 case 4: 993 switch (eBytes) { 994 case 1: 995 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 996 machInst, ufp0, rn, 0, align); 997 break; 998 case 2: 999 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1000 machInst, ufp0, rn, 0, align); 1001 break; 1002 case 4: 1003 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1004 machInst, ufp0, rn, 0, align); 1005 break; 1006 } 1007 break; 1008 case 6: 1009 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1010 machInst, ufp0, rn, 0, align); 1011 break; 1012 case 8: 1013 switch (eBytes) { 1014 case 2: 1015 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1016 machInst, ufp0, rn, 0, align); 1017 break; 1018 case 4: 1019 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1020 machInst, ufp0, rn, 0, align); 1021 break; 1022 } 1023 break; 1024 case 12: 1025 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1026 machInst, ufp0, rn, 0, align); 1027 break; 1028 case 16: 1029 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1030 machInst, ufp0, rn, 0, align); 1031 break; 1032 default: 1033 // Bad store size 1034 microOps[uopIdx++] = new Unknown(machInst); 1035 } 1036 if (wb) { 1037 if (rm != 15 && rm != 13) { 1038 microOps[uopIdx++] = 1039 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1040 } else { 1041 microOps[uopIdx++] = 1042 new MicroAddiUop(machInst, rn, rn, storeSize); 1043 } 1044 } 1045 assert(uopIdx == numMicroops); 1046 1047 for (unsigned i = 0; i < numMicroops - 1; i++) { 1048 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 1049* assert(uopPtr); 1050 uopPtr->setDelayedCommit(); 1051 } 1052 microOps[numMicroops - 1]->setLastMicroop(); 1053} 1054 1055VldMultOp64::VldMultOp64(const char mnem, ExtMachInst machInst, 1056* OpClass __opClass, RegIndex rn, RegIndex vd, 1057 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1058 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1059 PredMacroOp(mnem, machInst, __opClass) 1060{ 1061 RegIndex vx = NumFloatV8ArchRegs / 4; 1062 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1063 bool baseIsSP = isSP((IntRegIndex) rnsp); 1064 1065 numMicroops = wb ? 1 : 0; 1066 1067 int totNumBytes = numRegs * dataSize / 8; 1068 assert(totNumBytes <= 64); 1069 1070 // The guiding principle here is that no more than 16 bytes can be 1071 // transferred at a time 1072 int numMemMicroops = totNumBytes / 16; 1073 int residuum = totNumBytes % 16; 1074 if (residuum) 1075 ++numMemMicroops; 1076 numMicroops += numMemMicroops; 1077 1078 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1079 numMicroops += numMarshalMicroops; 1080 1081 microOps = new StaticInstPtr[numMicroops]; 1082 unsigned uopIdx = 0; 1083 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1084 TLB::AllowUnaligned; 1085 1086 int i = 0; 1087 for(; i < numMemMicroops - 1; ++i) { 1088 microOps[uopIdx++] = new MicroNeonLoad64( 1089 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1090 baseIsSP, 16 /* accSize /, eSize); 1091* } 1092 microOps[uopIdx++] = new MicroNeonLoad64( 1093 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1094 residuum ? residuum : 16 /* accSize /, eSize); 1095* 1096 // Writeback microop: the post-increment amount is encoded in "Rm": a 1097 // 64-bit general register OR as '11111' for an immediate value equal to 1098 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1099 if (wb) { 1100 if (rm != ((RegIndex) INTREG_X31)) { 1101 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1102 UXTX, 0); 1103 } else { 1104 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1105 totNumBytes); 1106 } 1107 } 1108 1109 for (int i = 0; i < numMarshalMicroops; ++i) { 1110 switch(numRegs) { 1111 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( 1112 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1113 numStructElems, 1, i /* step /); 1114* break; 1115 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( 1116 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1117 numStructElems, 2, i /* step /); 1118* break; 1119 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( 1120 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1121 numStructElems, 3, i /* step /); 1122* break; 1123 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( 1124 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1125 numStructElems, 4, i /* step /); 1126* break; 1127 default: panic("Invalid number of registers"); 1128 } 1129 1130 } 1131 1132 assert(uopIdx == numMicroops); 1133 1134 for (int i = 0; i < numMicroops - 1; ++i) { 1135 microOps[i]->setDelayedCommit(); 1136 } 1137 microOps[numMicroops - 1]->setLastMicroop(); 1138} 1139 1140VstMultOp64::VstMultOp64(const char mnem, ExtMachInst machInst, 1141* OpClass __opClass, RegIndex rn, RegIndex vd, 1142 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1143 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1144 PredMacroOp(mnem, machInst, __opClass) 1145{ 1146 RegIndex vx = NumFloatV8ArchRegs / 4; 1147 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1148 bool baseIsSP = isSP((IntRegIndex) rnsp); 1149 1150 numMicroops = wb ? 1 : 0; 1151 1152 int totNumBytes = numRegs * dataSize / 8; 1153 assert(totNumBytes <= 64); 1154 1155 // The guiding principle here is that no more than 16 bytes can be 1156 // transferred at a time 1157 int numMemMicroops = totNumBytes / 16; 1158 int residuum = totNumBytes % 16; 1159 if (residuum) 1160 ++numMemMicroops; 1161 numMicroops += numMemMicroops; 1162 1163 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1164 numMicroops += numMarshalMicroops; 1165 1166 microOps = new StaticInstPtr[numMicroops]; 1167 unsigned uopIdx = 0; 1168 1169 for(int i = 0; i < numMarshalMicroops; ++i) { 1170 switch (numRegs) { 1171 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( 1172 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1173 numStructElems, 1, i /* step /); 1174* break; 1175 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( 1176 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1177 numStructElems, 2, i /* step /); 1178* break; 1179 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( 1180 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1181 numStructElems, 3, i /* step /); 1182* break; 1183 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( 1184 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1185 numStructElems, 4, i /* step /); 1186* break; 1187 default: panic("Invalid number of registers"); 1188 } 1189 } 1190 1191 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1192 TLB::AllowUnaligned; 1193 1194 int i = 0; 1195 for(; i < numMemMicroops - 1; ++i) { 1196 microOps[uopIdx++] = new MicroNeonStore64( 1197 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1198 baseIsSP, 16 /* accSize /, eSize); 1199* } 1200 microOps[uopIdx++] = new MicroNeonStore64( 1201 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1202 residuum ? residuum : 16 /* accSize /, eSize); 1203* 1204 // Writeback microop: the post-increment amount is encoded in "Rm": a 1205 // 64-bit general register OR as '11111' for an immediate value equal to 1206 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1207 if (wb) { 1208 if (rm != ((RegIndex) INTREG_X31)) { 1209 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1210 UXTX, 0); 1211 } else { 1212 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1213 totNumBytes); 1214 } 1215 } 1216 1217 assert(uopIdx == numMicroops); 1218 1219 for (int i = 0; i < numMicroops - 1; i++) { 1220 microOps[i]->setDelayedCommit(); 1221 } 1222 microOps[numMicroops - 1]->setLastMicroop(); 1223} 1224 1225VldSingleOp64::VldSingleOp64(const char mnem, ExtMachInst machInst, 1226* OpClass __opClass, RegIndex rn, RegIndex vd, 1227 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1228 uint8_t numStructElems, uint8_t index, bool wb, 1229 bool replicate) : 1230 PredMacroOp(mnem, machInst, __opClass) 1231{ 1232 RegIndex vx = NumFloatV8ArchRegs / 4; 1233 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1234 bool baseIsSP = isSP((IntRegIndex) rnsp); 1235 1236 numMicroops = wb ? 1 : 0; 1237 1238 int eSizeBytes = 1 << eSize; 1239 int totNumBytes = numStructElems * eSizeBytes; 1240 assert(totNumBytes <= 64); 1241 1242 // The guiding principle here is that no more than 16 bytes can be 1243 // transferred at a time 1244 int numMemMicroops = totNumBytes / 16; 1245 int residuum = totNumBytes % 16; 1246 if (residuum) 1247 ++numMemMicroops; 1248 numMicroops += numMemMicroops; 1249 1250 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1251 numMicroops += numMarshalMicroops; 1252 1253 microOps = new StaticInstPtr[numMicroops]; 1254 unsigned uopIdx = 0; 1255 1256 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1257 TLB::AllowUnaligned; 1258 1259 int i = 0; 1260 for (; i < numMemMicroops - 1; ++i) { 1261 microOps[uopIdx++] = new MicroNeonLoad64( 1262 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1263 baseIsSP, 16 /* accSize /, eSize); 1264* } 1265 microOps[uopIdx++] = new MicroNeonLoad64( 1266 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1267 residuum ? residuum : 16 /* accSize /, eSize); 1268* 1269 // Writeback microop: the post-increment amount is encoded in "Rm": a 1270 // 64-bit general register OR as '11111' for an immediate value equal to 1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1272 if (wb) { 1273 if (rm != ((RegIndex) INTREG_X31)) { 1274 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1275 UXTX, 0); 1276 } else { 1277 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1278 totNumBytes); 1279 } 1280 } 1281 1282 for(int i = 0; i < numMarshalMicroops; ++i) { 1283 microOps[uopIdx++] = new MicroUnpackNeon64( 1284 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1285 numStructElems, index, i /* step /, replicate); 1286* } 1287 1288 assert(uopIdx == numMicroops); 1289 1290 for (int i = 0; i < numMicroops - 1; i++) { 1291 microOps[i]->setDelayedCommit(); 1292 } 1293 microOps[numMicroops - 1]->setLastMicroop(); 1294} 1295 1296VstSingleOp64::VstSingleOp64(const char mnem, ExtMachInst machInst, 1297* OpClass __opClass, RegIndex rn, RegIndex vd, 1298 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1299 uint8_t numStructElems, uint8_t index, bool wb, 1300 bool replicate) : 1301 PredMacroOp(mnem, machInst, __opClass) 1302{ 1303 RegIndex vx = NumFloatV8ArchRegs / 4; 1304 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1305 bool baseIsSP = isSP((IntRegIndex) rnsp); 1306 1307 numMicroops = wb ? 1 : 0; 1308 1309 int eSizeBytes = 1 << eSize; 1310 int totNumBytes = numStructElems * eSizeBytes; 1311 assert(totNumBytes <= 64); 1312 1313 // The guiding principle here is that no more than 16 bytes can be 1314 // transferred at a time 1315 int numMemMicroops = totNumBytes / 16; 1316 int residuum = totNumBytes % 16; 1317 if (residuum) 1318 ++numMemMicroops; 1319 numMicroops += numMemMicroops; 1320 1321 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1322 numMicroops += numMarshalMicroops; 1323 1324 microOps = new StaticInstPtr[numMicroops]; 1325 unsigned uopIdx = 0; 1326 1327 for(int i = 0; i < numMarshalMicroops; ++i) { 1328 microOps[uopIdx++] = new MicroPackNeon64( 1329 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1330 numStructElems, index, i /* step /, replicate); 1331* } 1332 1333 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1334 TLB::AllowUnaligned; 1335 1336 int i = 0; 1337 for(; i < numMemMicroops - 1; ++i) { 1338 microOps[uopIdx++] = new MicroNeonStore64( 1339 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1340 baseIsSP, 16 /* accsize /, eSize); 1341* } 1342 microOps[uopIdx++] = new MicroNeonStore64( 1343 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1344 residuum ? residuum : 16 /* accSize /, eSize); 1345* 1346 // Writeback microop: the post-increment amount is encoded in "Rm": a 1347 // 64-bit general register OR as '11111' for an immediate value equal to 1348 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1349 if (wb) { 1350 if (rm != ((RegIndex) INTREG_X31)) { 1351 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1352 UXTX, 0); 1353 } else { 1354 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1355 totNumBytes); 1356 } 1357 } 1358 1359 assert(uopIdx == numMicroops); 1360 1361 for (int i = 0; i < numMicroops - 1; i++) { 1362 microOps[i]->setDelayedCommit(); 1363 } 1364 microOps[numMicroops - 1]->setLastMicroop(); 1365} 1366 1367MacroVFPMemOp::MacroVFPMemOp(const char mnem, ExtMachInst machInst, 1368* OpClass __opClass, IntRegIndex rn, 1369 RegIndex vd, bool single, bool up, 1370 bool writeback, bool load, uint32_t offset) : 1371 PredMacroOp(mnem, machInst, __opClass) 1372{ 1373 int i = 0; 1374 1375 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1376 // to be functionally identical except that fldmx is deprecated. For now 1377 // we'll assume they're otherwise interchangable. 1378 int count = (single ? offset : (offset / 2)); 1379 if (count == 0 \|\| count > NumFloatV7ArchRegs) 1380 warn_once("Bad offset field for VFP load/store multiple.\n"); 1381 if (count == 0) { 1382 // Force there to be at least one microop so the macroop makes sense. 1383 writeback = true; 1384 } 1385 if (count > NumFloatV7ArchRegs) 1386 count = NumFloatV7ArchRegs; 1387 1388 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1389 microOps = new StaticInstPtr[numMicroops]; 1390 1391 int64_t addr = 0; 1392 1393 if (!up) 1394 addr = 4 * offset; 1395 1396 bool tempUp = up; 1397 for (int j = 0; j < count; j++) { 1398 if (load) { 1399 if (single) { 1400 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1401 tempUp, addr); 1402 } else { 1403 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1404 tempUp, addr); 1405 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1406 addr + (up ? 4 : -4)); 1407 } 1408 } else { 1409 if (single) { 1410 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1411 tempUp, addr); 1412 } else { 1413 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1414 tempUp, addr); 1415 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1416 addr + (up ? 4 : -4)); 1417 } 1418 } 1419 if (!tempUp) { 1420 addr -= (single ? 4 : 8); 1421 // The microops don't handle negative displacement, so turn if we 1422 // hit zero, flip polarity and start adding. 1423 if (addr <= 0) { 1424 tempUp = true; 1425 addr = -addr; 1426 } 1427 } else { 1428 addr += (single ? 4 : 8); 1429 } 1430 } 1431 1432 if (writeback) { 1433 if (up) { 1434 microOps[i++] = 1435 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1436 } else { 1437 microOps[i++] = 1438 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1439 } 1440 } 1441 1442 assert(numMicroops == i); 1443 microOps[numMicroops - 1]->setLastMicroop(); 1444 1445 for (StaticInstPtr curUop = microOps; 1446* !(curUop)->isLastMicroop(); curUop++) { 1447* MicroOp * uopPtr = dynamic_cast<MicroOp >(curUop->get()); 1448* assert(uopPtr); 1449 uopPtr->setDelayedCommit(); 1450 } 1451} 1452 1453std::string 1454MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1455{ 1456* std::stringstream ss; 1457 printMnemonic(ss); 1458 printReg(ss, ura); 1459 ss << ", "; 1460 printReg(ss, urb); 1461 ss << ", "; 1462 ccprintf(ss, "#%d", imm); 1463 return ss.str(); 1464} 1465 1466std::string 1467MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1468{ 1469* std::stringstream ss; 1470 printMnemonic(ss); 1471 printReg(ss, ura); 1472 ss << ", "; 1473 printReg(ss, urb); 1474 ss << ", "; 1475 ccprintf(ss, "#%d", imm); 1476 return ss.str(); 1477} 1478 1479std::string 1480MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable symtab) const 1481{ 1482* std::stringstream ss; 1483 printMnemonic(ss); 1484 ss << "[PC,CPSR]"; 1485 return ss.str(); 1486} 1487 1488std::string 1489MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1490{ 1491* std::stringstream ss; 1492 printMnemonic(ss); 1493 printReg(ss, ura); 1494 ccprintf(ss, ", "); 1495 printReg(ss, urb); 1496 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1497 return ss.str(); 1498} 1499 1500std::string 1501MicroIntMov::generateDisassembly(Addr pc, const SymbolTable symtab) const 1502{ 1503* std::stringstream ss; 1504 printMnemonic(ss); 1505 printReg(ss, ura); 1506 ss << ", "; 1507 printReg(ss, urb); 1508 return ss.str(); 1509} 1510 1511std::string 1512MicroIntOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1513{ 1514* std::stringstream ss; 1515 printMnemonic(ss); 1516 printReg(ss, ura); 1517 ss << ", "; 1518 printReg(ss, urb); 1519 ss << ", "; 1520 printReg(ss, urc); 1521 return ss.str(); 1522} 1523 1524std::string 1525MicroMemOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1526{ 1527* std::stringstream ss; 1528 printMnemonic(ss); 1529 if (isFloating()) 1530 printReg(ss, ura + FP_Reg_Base); 1531 else 1532 printReg(ss, ura); 1533 ss << ", ["; 1534 printReg(ss, urb); 1535 ss << ", "; 1536 ccprintf(ss, "#%d", imm); 1537 ss << "]"; 1538 return ss.str(); 1539} 1540	450} 451 452VldMultOp::VldMultOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 453* unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 454 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 455 PredMacroOp(mnem, machInst, __opClass) 456{ 457 assert(regs > 0 && regs <= 4); 458 assert(regs % elems == 0); 459 460 numMicroops = (regs > 2) ? 2 : 1; 461 bool wb = (rm != 15); 462 bool deinterleave = (elems > 1); 463 464 if (wb) numMicroops++; 465 if (deinterleave) numMicroops += (regs / elems); 466 microOps = new StaticInstPtr[numMicroops]; 467 468 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 469 470 uint32_t noAlign = TLB::MustBeOne; 471 472 unsigned uopIdx = 0; 473 switch (regs) { 474 case 4: 475 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 476 size, machInst, rMid, rn, 0, align); 477 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 478 size, machInst, rMid + 4, rn, 16, noAlign); 479 break; 480 case 3: 481 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 482 size, machInst, rMid, rn, 0, align); 483 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 484 size, machInst, rMid + 4, rn, 16, noAlign); 485 break; 486 case 2: 487 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 488 size, machInst, rMid, rn, 0, align); 489 break; 490 case 1: 491 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 492 size, machInst, rMid, rn, 0, align); 493 break; 494 default: 495 // Unknown number of registers 496 microOps[uopIdx++] = new Unknown(machInst); 497 } 498 if (wb) { 499 if (rm != 15 && rm != 13) { 500 microOps[uopIdx++] = 501 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 502 } else { 503 microOps[uopIdx++] = 504 new MicroAddiUop(machInst, rn, rn, regs * 8); 505 } 506 } 507 if (deinterleave) { 508 switch (elems) { 509 case 4: 510 assert(regs == 4); 511 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 512 size, machInst, vd * 2, rMid, inc * 2); 513 break; 514 case 3: 515 assert(regs == 3); 516 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 517 size, machInst, vd * 2, rMid, inc * 2); 518 break; 519 case 2: 520 assert(regs == 4 \|\| regs == 2); 521 if (regs == 4) { 522 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 523 size, machInst, vd * 2, rMid, inc * 2); 524 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 525 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 526 } else { 527 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 528 size, machInst, vd * 2, rMid, inc * 2); 529 } 530 break; 531 default: 532 // Bad number of elements to deinterleave 533 microOps[uopIdx++] = new Unknown(machInst); 534 } 535 } 536 assert(uopIdx == numMicroops); 537 538 for (unsigned i = 0; i < numMicroops - 1; i++) { 539 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 540* assert(uopPtr); 541 uopPtr->setDelayedCommit(); 542 } 543 microOps[numMicroops - 1]->setLastMicroop(); 544} 545 546VldSingleOp::VldSingleOp(const char mnem, ExtMachInst machInst, 547* OpClass __opClass, bool all, unsigned elems, 548 RegIndex rn, RegIndex vd, unsigned regs, 549 unsigned inc, uint32_t size, uint32_t align, 550 RegIndex rm, unsigned lane) : 551 PredMacroOp(mnem, machInst, __opClass) 552{ 553 assert(regs > 0 && regs <= 4); 554 assert(regs % elems == 0); 555 556 unsigned eBytes = (1 << size); 557 unsigned loadSize = eBytes * elems; 558 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 559 sizeof(FloatRegBits); 560 561 assert(loadRegs > 0 && loadRegs <= 4); 562 563 numMicroops = 1; 564 bool wb = (rm != 15); 565 566 if (wb) numMicroops++; 567 numMicroops += (regs / elems); 568 microOps = new StaticInstPtr[numMicroops]; 569 570 RegIndex ufp0 = NumFloatV7ArchRegs; 571 572 unsigned uopIdx = 0; 573 switch (loadSize) { 574 case 1: 575 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 576 machInst, ufp0, rn, 0, align); 577 break; 578 case 2: 579 if (eBytes == 2) { 580 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 581 machInst, ufp0, rn, 0, align); 582 } else { 583 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 584 machInst, ufp0, rn, 0, align); 585 } 586 break; 587 case 3: 588 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 589 machInst, ufp0, rn, 0, align); 590 break; 591 case 4: 592 switch (eBytes) { 593 case 1: 594 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 595 machInst, ufp0, rn, 0, align); 596 break; 597 case 2: 598 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 599 machInst, ufp0, rn, 0, align); 600 break; 601 case 4: 602 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 603 machInst, ufp0, rn, 0, align); 604 break; 605 } 606 break; 607 case 6: 608 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 609 machInst, ufp0, rn, 0, align); 610 break; 611 case 8: 612 switch (eBytes) { 613 case 2: 614 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 615 machInst, ufp0, rn, 0, align); 616 break; 617 case 4: 618 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 619 machInst, ufp0, rn, 0, align); 620 break; 621 } 622 break; 623 case 12: 624 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 625 machInst, ufp0, rn, 0, align); 626 break; 627 case 16: 628 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 629 machInst, ufp0, rn, 0, align); 630 break; 631 default: 632 // Unrecognized load size 633 microOps[uopIdx++] = new Unknown(machInst); 634 } 635 if (wb) { 636 if (rm != 15 && rm != 13) { 637 microOps[uopIdx++] = 638 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 639 } else { 640 microOps[uopIdx++] = 641 new MicroAddiUop(machInst, rn, rn, loadSize); 642 } 643 } 644 switch (elems) { 645 case 4: 646 assert(regs == 4); 647 switch (size) { 648 case 0: 649 if (all) { 650 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 651 machInst, vd * 2, ufp0, inc * 2); 652 } else { 653 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 654 machInst, vd * 2, ufp0, inc * 2, lane); 655 } 656 break; 657 case 1: 658 if (all) { 659 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 660 machInst, vd * 2, ufp0, inc * 2); 661 } else { 662 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 663 machInst, vd * 2, ufp0, inc * 2, lane); 664 } 665 break; 666 case 2: 667 if (all) { 668 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 669 machInst, vd * 2, ufp0, inc * 2); 670 } else { 671 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 672 machInst, vd * 2, ufp0, inc * 2, lane); 673 } 674 break; 675 default: 676 // Bad size 677 microOps[uopIdx++] = new Unknown(machInst); 678 break; 679 } 680 break; 681 case 3: 682 assert(regs == 3); 683 switch (size) { 684 case 0: 685 if (all) { 686 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 687 machInst, vd * 2, ufp0, inc * 2); 688 } else { 689 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 690 machInst, vd * 2, ufp0, inc * 2, lane); 691 } 692 break; 693 case 1: 694 if (all) { 695 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 696 machInst, vd * 2, ufp0, inc * 2); 697 } else { 698 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 699 machInst, vd * 2, ufp0, inc * 2, lane); 700 } 701 break; 702 case 2: 703 if (all) { 704 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 705 machInst, vd * 2, ufp0, inc * 2); 706 } else { 707 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 708 machInst, vd * 2, ufp0, inc * 2, lane); 709 } 710 break; 711 default: 712 // Bad size 713 microOps[uopIdx++] = new Unknown(machInst); 714 break; 715 } 716 break; 717 case 2: 718 assert(regs == 2); 719 assert(loadRegs <= 2); 720 switch (size) { 721 case 0: 722 if (all) { 723 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 724 machInst, vd * 2, ufp0, inc * 2); 725 } else { 726 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 727 machInst, vd * 2, ufp0, inc * 2, lane); 728 } 729 break; 730 case 1: 731 if (all) { 732 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 733 machInst, vd * 2, ufp0, inc * 2); 734 } else { 735 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 736 machInst, vd * 2, ufp0, inc * 2, lane); 737 } 738 break; 739 case 2: 740 if (all) { 741 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 742 machInst, vd * 2, ufp0, inc * 2); 743 } else { 744 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 745 machInst, vd * 2, ufp0, inc * 2, lane); 746 } 747 break; 748 default: 749 // Bad size 750 microOps[uopIdx++] = new Unknown(machInst); 751 break; 752 } 753 break; 754 case 1: 755 assert(regs == 1 \|\| (all && regs == 2)); 756 assert(loadRegs <= 2); 757 for (unsigned offset = 0; offset < regs; offset++) { 758 switch (size) { 759 case 0: 760 if (all) { 761 microOps[uopIdx++] = 762 new MicroUnpackAllNeon2to2Uop<uint8_t>( 763 machInst, (vd + offset) * 2, ufp0, inc * 2); 764 } else { 765 microOps[uopIdx++] = 766 new MicroUnpackNeon2to2Uop<uint8_t>( 767 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 768 } 769 break; 770 case 1: 771 if (all) { 772 microOps[uopIdx++] = 773 new MicroUnpackAllNeon2to2Uop<uint16_t>( 774 machInst, (vd + offset) * 2, ufp0, inc * 2); 775 } else { 776 microOps[uopIdx++] = 777 new MicroUnpackNeon2to2Uop<uint16_t>( 778 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 779 } 780 break; 781 case 2: 782 if (all) { 783 microOps[uopIdx++] = 784 new MicroUnpackAllNeon2to2Uop<uint32_t>( 785 machInst, (vd + offset) * 2, ufp0, inc * 2); 786 } else { 787 microOps[uopIdx++] = 788 new MicroUnpackNeon2to2Uop<uint32_t>( 789 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 790 } 791 break; 792 default: 793 // Bad size 794 microOps[uopIdx++] = new Unknown(machInst); 795 break; 796 } 797 } 798 break; 799 default: 800 // Bad number of elements to unpack 801 microOps[uopIdx++] = new Unknown(machInst); 802 } 803 assert(uopIdx == numMicroops); 804 805 for (unsigned i = 0; i < numMicroops - 1; i++) { 806 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 807* assert(uopPtr); 808 uopPtr->setDelayedCommit(); 809 } 810 microOps[numMicroops - 1]->setLastMicroop(); 811} 812 813VstMultOp::VstMultOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 814* unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 815 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 816 PredMacroOp(mnem, machInst, __opClass) 817{ 818 assert(regs > 0 && regs <= 4); 819 assert(regs % elems == 0); 820 821 numMicroops = (regs > 2) ? 2 : 1; 822 bool wb = (rm != 15); 823 bool interleave = (elems > 1); 824 825 if (wb) numMicroops++; 826 if (interleave) numMicroops += (regs / elems); 827 microOps = new StaticInstPtr[numMicroops]; 828 829 uint32_t noAlign = TLB::MustBeOne; 830 831 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 832 833 unsigned uopIdx = 0; 834 if (interleave) { 835 switch (elems) { 836 case 4: 837 assert(regs == 4); 838 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 839 size, machInst, rMid, vd * 2, inc * 2); 840 break; 841 case 3: 842 assert(regs == 3); 843 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 844 size, machInst, rMid, vd * 2, inc * 2); 845 break; 846 case 2: 847 assert(regs == 4 \|\| regs == 2); 848 if (regs == 4) { 849 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 850 size, machInst, rMid, vd * 2, inc * 2); 851 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 852 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 853 } else { 854 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 855 size, machInst, rMid, vd * 2, inc * 2); 856 } 857 break; 858 default: 859 // Bad number of elements to interleave 860 microOps[uopIdx++] = new Unknown(machInst); 861 } 862 } 863 switch (regs) { 864 case 4: 865 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 866 size, machInst, rMid, rn, 0, align); 867 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 868 size, machInst, rMid + 4, rn, 16, noAlign); 869 break; 870 case 3: 871 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 872 size, machInst, rMid, rn, 0, align); 873 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 874 size, machInst, rMid + 4, rn, 16, noAlign); 875 break; 876 case 2: 877 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 878 size, machInst, rMid, rn, 0, align); 879 break; 880 case 1: 881 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 882 size, machInst, rMid, rn, 0, align); 883 break; 884 default: 885 // Unknown number of registers 886 microOps[uopIdx++] = new Unknown(machInst); 887 } 888 if (wb) { 889 if (rm != 15 && rm != 13) { 890 microOps[uopIdx++] = 891 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 892 } else { 893 microOps[uopIdx++] = 894 new MicroAddiUop(machInst, rn, rn, regs * 8); 895 } 896 } 897 assert(uopIdx == numMicroops); 898 899 for (unsigned i = 0; i < numMicroops - 1; i++) { 900 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 901* assert(uopPtr); 902 uopPtr->setDelayedCommit(); 903 } 904 microOps[numMicroops - 1]->setLastMicroop(); 905} 906 907VstSingleOp::VstSingleOp(const char mnem, ExtMachInst machInst, 908* OpClass __opClass, bool all, unsigned elems, 909 RegIndex rn, RegIndex vd, unsigned regs, 910 unsigned inc, uint32_t size, uint32_t align, 911 RegIndex rm, unsigned lane) : 912 PredMacroOp(mnem, machInst, __opClass) 913{ 914 assert(!all); 915 assert(regs > 0 && regs <= 4); 916 assert(regs % elems == 0); 917 918 unsigned eBytes = (1 << size); 919 unsigned storeSize = eBytes * elems; 920 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 921 sizeof(FloatRegBits); 922 923 assert(storeRegs > 0 && storeRegs <= 4); 924 925 numMicroops = 1; 926 bool wb = (rm != 15); 927 928 if (wb) numMicroops++; 929 numMicroops += (regs / elems); 930 microOps = new StaticInstPtr[numMicroops]; 931 932 RegIndex ufp0 = NumFloatV7ArchRegs; 933 934 unsigned uopIdx = 0; 935 switch (elems) { 936 case 4: 937 assert(regs == 4); 938 switch (size) { 939 case 0: 940 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 941 machInst, ufp0, vd * 2, inc * 2, lane); 942 break; 943 case 1: 944 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 945 machInst, ufp0, vd * 2, inc * 2, lane); 946 break; 947 case 2: 948 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 949 machInst, ufp0, vd * 2, inc * 2, lane); 950 break; 951 default: 952 // Bad size 953 microOps[uopIdx++] = new Unknown(machInst); 954 break; 955 } 956 break; 957 case 3: 958 assert(regs == 3); 959 switch (size) { 960 case 0: 961 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 962 machInst, ufp0, vd * 2, inc * 2, lane); 963 break; 964 case 1: 965 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 966 machInst, ufp0, vd * 2, inc * 2, lane); 967 break; 968 case 2: 969 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 970 machInst, ufp0, vd * 2, inc * 2, lane); 971 break; 972 default: 973 // Bad size 974 microOps[uopIdx++] = new Unknown(machInst); 975 break; 976 } 977 break; 978 case 2: 979 assert(regs == 2); 980 assert(storeRegs <= 2); 981 switch (size) { 982 case 0: 983 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 984 machInst, ufp0, vd * 2, inc * 2, lane); 985 break; 986 case 1: 987 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 988 machInst, ufp0, vd * 2, inc * 2, lane); 989 break; 990 case 2: 991 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 992 machInst, ufp0, vd * 2, inc * 2, lane); 993 break; 994 default: 995 // Bad size 996 microOps[uopIdx++] = new Unknown(machInst); 997 break; 998 } 999 break; 1000 case 1: 1001 assert(regs == 1 \|\| (all && regs == 2)); 1002 assert(storeRegs <= 2); 1003 for (unsigned offset = 0; offset < regs; offset++) { 1004 switch (size) { 1005 case 0: 1006 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 1007 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1008 break; 1009 case 1: 1010 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 1011 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1012 break; 1013 case 2: 1014 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 1015 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1016 break; 1017 default: 1018 // Bad size 1019 microOps[uopIdx++] = new Unknown(machInst); 1020 break; 1021 } 1022 } 1023 break; 1024 default: 1025 // Bad number of elements to unpack 1026 microOps[uopIdx++] = new Unknown(machInst); 1027 } 1028 switch (storeSize) { 1029 case 1: 1030 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 1031 machInst, ufp0, rn, 0, align); 1032 break; 1033 case 2: 1034 if (eBytes == 2) { 1035 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 1036 machInst, ufp0, rn, 0, align); 1037 } else { 1038 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 1039 machInst, ufp0, rn, 0, align); 1040 } 1041 break; 1042 case 3: 1043 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 1044 machInst, ufp0, rn, 0, align); 1045 break; 1046 case 4: 1047 switch (eBytes) { 1048 case 1: 1049 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 1050 machInst, ufp0, rn, 0, align); 1051 break; 1052 case 2: 1053 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1054 machInst, ufp0, rn, 0, align); 1055 break; 1056 case 4: 1057 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1058 machInst, ufp0, rn, 0, align); 1059 break; 1060 } 1061 break; 1062 case 6: 1063 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1064 machInst, ufp0, rn, 0, align); 1065 break; 1066 case 8: 1067 switch (eBytes) { 1068 case 2: 1069 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1070 machInst, ufp0, rn, 0, align); 1071 break; 1072 case 4: 1073 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1074 machInst, ufp0, rn, 0, align); 1075 break; 1076 } 1077 break; 1078 case 12: 1079 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1080 machInst, ufp0, rn, 0, align); 1081 break; 1082 case 16: 1083 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1084 machInst, ufp0, rn, 0, align); 1085 break; 1086 default: 1087 // Bad store size 1088 microOps[uopIdx++] = new Unknown(machInst); 1089 } 1090 if (wb) { 1091 if (rm != 15 && rm != 13) { 1092 microOps[uopIdx++] = 1093 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1094 } else { 1095 microOps[uopIdx++] = 1096 new MicroAddiUop(machInst, rn, rn, storeSize); 1097 } 1098 } 1099 assert(uopIdx == numMicroops); 1100 1101 for (unsigned i = 0; i < numMicroops - 1; i++) { 1102 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 1103* assert(uopPtr); 1104 uopPtr->setDelayedCommit(); 1105 } 1106 microOps[numMicroops - 1]->setLastMicroop(); 1107} 1108 1109VldMultOp64::VldMultOp64(const char mnem, ExtMachInst machInst, 1110* OpClass __opClass, RegIndex rn, RegIndex vd, 1111 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1112 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1113 PredMacroOp(mnem, machInst, __opClass) 1114{ 1115 RegIndex vx = NumFloatV8ArchRegs / 4; 1116 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1117 bool baseIsSP = isSP((IntRegIndex) rnsp); 1118 1119 numMicroops = wb ? 1 : 0; 1120 1121 int totNumBytes = numRegs * dataSize / 8; 1122 assert(totNumBytes <= 64); 1123 1124 // The guiding principle here is that no more than 16 bytes can be 1125 // transferred at a time 1126 int numMemMicroops = totNumBytes / 16; 1127 int residuum = totNumBytes % 16; 1128 if (residuum) 1129 ++numMemMicroops; 1130 numMicroops += numMemMicroops; 1131 1132 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1133 numMicroops += numMarshalMicroops; 1134 1135 microOps = new StaticInstPtr[numMicroops]; 1136 unsigned uopIdx = 0; 1137 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1138 TLB::AllowUnaligned; 1139 1140 int i = 0; 1141 for(; i < numMemMicroops - 1; ++i) { 1142 microOps[uopIdx++] = new MicroNeonLoad64( 1143 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1144 baseIsSP, 16 /* accSize /, eSize); 1145* } 1146 microOps[uopIdx++] = new MicroNeonLoad64( 1147 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1148 residuum ? residuum : 16 /* accSize /, eSize); 1149* 1150 // Writeback microop: the post-increment amount is encoded in "Rm": a 1151 // 64-bit general register OR as '11111' for an immediate value equal to 1152 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1153 if (wb) { 1154 if (rm != ((RegIndex) INTREG_X31)) { 1155 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1156 UXTX, 0); 1157 } else { 1158 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1159 totNumBytes); 1160 } 1161 } 1162 1163 for (int i = 0; i < numMarshalMicroops; ++i) { 1164 switch(numRegs) { 1165 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( 1166 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1167 numStructElems, 1, i /* step /); 1168* break; 1169 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( 1170 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1171 numStructElems, 2, i /* step /); 1172* break; 1173 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( 1174 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1175 numStructElems, 3, i /* step /); 1176* break; 1177 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( 1178 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1179 numStructElems, 4, i /* step /); 1180* break; 1181 default: panic("Invalid number of registers"); 1182 } 1183 1184 } 1185 1186 assert(uopIdx == numMicroops); 1187 1188 for (int i = 0; i < numMicroops - 1; ++i) { 1189 microOps[i]->setDelayedCommit(); 1190 } 1191 microOps[numMicroops - 1]->setLastMicroop(); 1192} 1193 1194VstMultOp64::VstMultOp64(const char mnem, ExtMachInst machInst, 1195* OpClass __opClass, RegIndex rn, RegIndex vd, 1196 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1197 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1198 PredMacroOp(mnem, machInst, __opClass) 1199{ 1200 RegIndex vx = NumFloatV8ArchRegs / 4; 1201 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1202 bool baseIsSP = isSP((IntRegIndex) rnsp); 1203 1204 numMicroops = wb ? 1 : 0; 1205 1206 int totNumBytes = numRegs * dataSize / 8; 1207 assert(totNumBytes <= 64); 1208 1209 // The guiding principle here is that no more than 16 bytes can be 1210 // transferred at a time 1211 int numMemMicroops = totNumBytes / 16; 1212 int residuum = totNumBytes % 16; 1213 if (residuum) 1214 ++numMemMicroops; 1215 numMicroops += numMemMicroops; 1216 1217 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1218 numMicroops += numMarshalMicroops; 1219 1220 microOps = new StaticInstPtr[numMicroops]; 1221 unsigned uopIdx = 0; 1222 1223 for(int i = 0; i < numMarshalMicroops; ++i) { 1224 switch (numRegs) { 1225 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( 1226 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1227 numStructElems, 1, i /* step /); 1228* break; 1229 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( 1230 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1231 numStructElems, 2, i /* step /); 1232* break; 1233 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( 1234 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1235 numStructElems, 3, i /* step /); 1236* break; 1237 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( 1238 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1239 numStructElems, 4, i /* step /); 1240* break; 1241 default: panic("Invalid number of registers"); 1242 } 1243 } 1244 1245 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1246 TLB::AllowUnaligned; 1247 1248 int i = 0; 1249 for(; i < numMemMicroops - 1; ++i) { 1250 microOps[uopIdx++] = new MicroNeonStore64( 1251 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1252 baseIsSP, 16 /* accSize /, eSize); 1253* } 1254 microOps[uopIdx++] = new MicroNeonStore64( 1255 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1256 residuum ? residuum : 16 /* accSize /, eSize); 1257* 1258 // Writeback microop: the post-increment amount is encoded in "Rm": a 1259 // 64-bit general register OR as '11111' for an immediate value equal to 1260 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1261 if (wb) { 1262 if (rm != ((RegIndex) INTREG_X31)) { 1263 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1264 UXTX, 0); 1265 } else { 1266 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1267 totNumBytes); 1268 } 1269 } 1270 1271 assert(uopIdx == numMicroops); 1272 1273 for (int i = 0; i < numMicroops - 1; i++) { 1274 microOps[i]->setDelayedCommit(); 1275 } 1276 microOps[numMicroops - 1]->setLastMicroop(); 1277} 1278 1279VldSingleOp64::VldSingleOp64(const char mnem, ExtMachInst machInst, 1280* OpClass __opClass, RegIndex rn, RegIndex vd, 1281 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1282 uint8_t numStructElems, uint8_t index, bool wb, 1283 bool replicate) : 1284 PredMacroOp(mnem, machInst, __opClass) 1285{ 1286 RegIndex vx = NumFloatV8ArchRegs / 4; 1287 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1288 bool baseIsSP = isSP((IntRegIndex) rnsp); 1289 1290 numMicroops = wb ? 1 : 0; 1291 1292 int eSizeBytes = 1 << eSize; 1293 int totNumBytes = numStructElems * eSizeBytes; 1294 assert(totNumBytes <= 64); 1295 1296 // The guiding principle here is that no more than 16 bytes can be 1297 // transferred at a time 1298 int numMemMicroops = totNumBytes / 16; 1299 int residuum = totNumBytes % 16; 1300 if (residuum) 1301 ++numMemMicroops; 1302 numMicroops += numMemMicroops; 1303 1304 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1305 numMicroops += numMarshalMicroops; 1306 1307 microOps = new StaticInstPtr[numMicroops]; 1308 unsigned uopIdx = 0; 1309 1310 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1311 TLB::AllowUnaligned; 1312 1313 int i = 0; 1314 for (; i < numMemMicroops - 1; ++i) { 1315 microOps[uopIdx++] = new MicroNeonLoad64( 1316 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1317 baseIsSP, 16 /* accSize /, eSize); 1318* } 1319 microOps[uopIdx++] = new MicroNeonLoad64( 1320 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1321 residuum ? residuum : 16 /* accSize /, eSize); 1322* 1323 // Writeback microop: the post-increment amount is encoded in "Rm": a 1324 // 64-bit general register OR as '11111' for an immediate value equal to 1325 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1326 if (wb) { 1327 if (rm != ((RegIndex) INTREG_X31)) { 1328 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1329 UXTX, 0); 1330 } else { 1331 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1332 totNumBytes); 1333 } 1334 } 1335 1336 for(int i = 0; i < numMarshalMicroops; ++i) { 1337 microOps[uopIdx++] = new MicroUnpackNeon64( 1338 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1339 numStructElems, index, i /* step /, replicate); 1340* } 1341 1342 assert(uopIdx == numMicroops); 1343 1344 for (int i = 0; i < numMicroops - 1; i++) { 1345 microOps[i]->setDelayedCommit(); 1346 } 1347 microOps[numMicroops - 1]->setLastMicroop(); 1348} 1349 1350VstSingleOp64::VstSingleOp64(const char mnem, ExtMachInst machInst, 1351* OpClass __opClass, RegIndex rn, RegIndex vd, 1352 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1353 uint8_t numStructElems, uint8_t index, bool wb, 1354 bool replicate) : 1355 PredMacroOp(mnem, machInst, __opClass) 1356{ 1357 RegIndex vx = NumFloatV8ArchRegs / 4; 1358 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1359 bool baseIsSP = isSP((IntRegIndex) rnsp); 1360 1361 numMicroops = wb ? 1 : 0; 1362 1363 int eSizeBytes = 1 << eSize; 1364 int totNumBytes = numStructElems * eSizeBytes; 1365 assert(totNumBytes <= 64); 1366 1367 // The guiding principle here is that no more than 16 bytes can be 1368 // transferred at a time 1369 int numMemMicroops = totNumBytes / 16; 1370 int residuum = totNumBytes % 16; 1371 if (residuum) 1372 ++numMemMicroops; 1373 numMicroops += numMemMicroops; 1374 1375 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1376 numMicroops += numMarshalMicroops; 1377 1378 microOps = new StaticInstPtr[numMicroops]; 1379 unsigned uopIdx = 0; 1380 1381 for(int i = 0; i < numMarshalMicroops; ++i) { 1382 microOps[uopIdx++] = new MicroPackNeon64( 1383 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1384 numStructElems, index, i /* step /, replicate); 1385* } 1386 1387 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1388 TLB::AllowUnaligned; 1389 1390 int i = 0; 1391 for(; i < numMemMicroops - 1; ++i) { 1392 microOps[uopIdx++] = new MicroNeonStore64( 1393 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1394 baseIsSP, 16 /* accsize /, eSize); 1395* } 1396 microOps[uopIdx++] = new MicroNeonStore64( 1397 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1398 residuum ? residuum : 16 /* accSize /, eSize); 1399* 1400 // Writeback microop: the post-increment amount is encoded in "Rm": a 1401 // 64-bit general register OR as '11111' for an immediate value equal to 1402 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1403 if (wb) { 1404 if (rm != ((RegIndex) INTREG_X31)) { 1405 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1406 UXTX, 0); 1407 } else { 1408 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1409 totNumBytes); 1410 } 1411 } 1412 1413 assert(uopIdx == numMicroops); 1414 1415 for (int i = 0; i < numMicroops - 1; i++) { 1416 microOps[i]->setDelayedCommit(); 1417 } 1418 microOps[numMicroops - 1]->setLastMicroop(); 1419} 1420 1421MacroVFPMemOp::MacroVFPMemOp(const char mnem, ExtMachInst machInst, 1422* OpClass __opClass, IntRegIndex rn, 1423 RegIndex vd, bool single, bool up, 1424 bool writeback, bool load, uint32_t offset) : 1425 PredMacroOp(mnem, machInst, __opClass) 1426{ 1427 int i = 0; 1428 1429 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1430 // to be functionally identical except that fldmx is deprecated. For now 1431 // we'll assume they're otherwise interchangable. 1432 int count = (single ? offset : (offset / 2)); 1433 if (count == 0 \|\| count > NumFloatV7ArchRegs) 1434 warn_once("Bad offset field for VFP load/store multiple.\n"); 1435 if (count == 0) { 1436 // Force there to be at least one microop so the macroop makes sense. 1437 writeback = true; 1438 } 1439 if (count > NumFloatV7ArchRegs) 1440 count = NumFloatV7ArchRegs; 1441 1442 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1443 microOps = new StaticInstPtr[numMicroops]; 1444 1445 int64_t addr = 0; 1446 1447 if (!up) 1448 addr = 4 * offset; 1449 1450 bool tempUp = up; 1451 for (int j = 0; j < count; j++) { 1452 if (load) { 1453 if (single) { 1454 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1455 tempUp, addr); 1456 } else { 1457 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1458 tempUp, addr); 1459 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1460 addr + (up ? 4 : -4)); 1461 } 1462 } else { 1463 if (single) { 1464 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1465 tempUp, addr); 1466 } else { 1467 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1468 tempUp, addr); 1469 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1470 addr + (up ? 4 : -4)); 1471 } 1472 } 1473 if (!tempUp) { 1474 addr -= (single ? 4 : 8); 1475 // The microops don't handle negative displacement, so turn if we 1476 // hit zero, flip polarity and start adding. 1477 if (addr <= 0) { 1478 tempUp = true; 1479 addr = -addr; 1480 } 1481 } else { 1482 addr += (single ? 4 : 8); 1483 } 1484 } 1485 1486 if (writeback) { 1487 if (up) { 1488 microOps[i++] = 1489 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1490 } else { 1491 microOps[i++] = 1492 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1493 } 1494 } 1495 1496 assert(numMicroops == i); 1497 microOps[numMicroops - 1]->setLastMicroop(); 1498 1499 for (StaticInstPtr curUop = microOps; 1500* !(curUop)->isLastMicroop(); curUop++) { 1501* MicroOp * uopPtr = dynamic_cast<MicroOp >(curUop->get()); 1502* assert(uopPtr); 1503 uopPtr->setDelayedCommit(); 1504 } 1505} 1506 1507std::string 1508MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1509{ 1510* std::stringstream ss; 1511 printMnemonic(ss); 1512 printReg(ss, ura); 1513 ss << ", "; 1514 printReg(ss, urb); 1515 ss << ", "; 1516 ccprintf(ss, "#%d", imm); 1517 return ss.str(); 1518} 1519 1520std::string 1521MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1522{ 1523* std::stringstream ss; 1524 printMnemonic(ss); 1525 printReg(ss, ura); 1526 ss << ", "; 1527 printReg(ss, urb); 1528 ss << ", "; 1529 ccprintf(ss, "#%d", imm); 1530 return ss.str(); 1531} 1532 1533std::string 1534MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable symtab) const 1535{ 1536* std::stringstream ss; 1537 printMnemonic(ss); 1538 ss << "[PC,CPSR]"; 1539 return ss.str(); 1540} 1541 1542std::string 1543MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1544{ 1545* std::stringstream ss; 1546 printMnemonic(ss); 1547 printReg(ss, ura); 1548 ccprintf(ss, ", "); 1549 printReg(ss, urb); 1550 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1551 return ss.str(); 1552} 1553 1554std::string 1555MicroIntMov::generateDisassembly(Addr pc, const SymbolTable symtab) const 1556{ 1557* std::stringstream ss; 1558 printMnemonic(ss); 1559 printReg(ss, ura); 1560 ss << ", "; 1561 printReg(ss, urb); 1562 return ss.str(); 1563} 1564 1565std::string 1566MicroIntOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1567{ 1568* std::stringstream ss; 1569 printMnemonic(ss); 1570 printReg(ss, ura); 1571 ss << ", "; 1572 printReg(ss, urb); 1573 ss << ", "; 1574 printReg(ss, urc); 1575 return ss.str(); 1576} 1577 1578std::string 1579MicroMemOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1580{ 1581* std::stringstream ss; 1582 printMnemonic(ss); 1583 if (isFloating()) 1584 printReg(ss, ura + FP_Reg_Base); 1585 else 1586 printReg(ss, ura); 1587 ss << ", ["; 1588 printReg(ss, urb); 1589 ss << ", "; 1590 ccprintf(ss, "#%d", imm); 1591 ss << "]"; 1592 return ss.str(); 1593} 1594
	1595std::string 1596MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1597{ 1598* std::stringstream ss; 1599 printMnemonic(ss); 1600 printReg(ss, dest); 1601 ss << ","; 1602 printReg(ss, dest2); 1603 ss << ", ["; 1604 printReg(ss, urb); 1605 ss << ", "; 1606 ccprintf(ss, "#%d", imm); 1607 ss << "]"; 1608 return ss.str();
1541}	1609}
	1610 1611}

1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43#include <sstream>
44
45#include "arch/arm/insts/macromem.hh"
46
47#include "arch/arm/generated/decoder.hh"
48#include "arch/arm/insts/neon64_mem.hh"
49
50using namespace std;
51using namespace ArmISAInst;
52
53namespace ArmISA
54{
55
56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
57 OpClass __opClass, IntRegIndex rn,
58 bool index, bool up, bool user, bool writeback,
59 bool load, uint32_t reglist) :
60 PredMacroOp(mnem, machInst, __opClass)
61{
62 uint32_t regs = reglist;
63 uint32_t ones = number_of_ones(reglist);

64 // Remember that writeback adds a uop or two and the temp register adds one
65 numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1;

64 uint32_t mem_ops = ones;

66

65

67 // It's technically legal to do a lot of nothing
68 if (!ones)

66 // Copy the base address register if we overwrite it, or if this instruction
67 // is basically a no-op (we have to do something)
68 bool copy_base = (bits(reglist, rn) && load) || !ones;
69 bool force_user = user & !bits(reglist, 15);
70 bool exception_ret = user & bits(reglist, 15);
71 bool pc_temp = load && writeback && bits(reglist, 15);
72
73 if (!ones) {

69 numMicroops = 1;

74 numMicroops = 1;

75 } else if (load) {
76 numMicroops = ((ones + 1) / 2)
77 + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
78 + (copy_base ? 1 : 0)
79 + (writeback? 1 : 0)
80 + (pc_temp ? 1 : 0);
81 } else {
82 numMicroops = ones + (writeback ? 1 : 0);
83 }

70
71 microOps = new StaticInstPtr[numMicroops];

84
85 microOps = new StaticInstPtr[numMicroops];

86

72 uint32_t addr = 0;
73
74 if (!up)
75 addr = (ones << 2) - 4;
76
77 if (!index)
78 addr += 4;
79
80 StaticInstPtr *uop = microOps;
81
82 // Add 0 to Rn and stick it in ureg0.
83 // This is equivalent to a move.

87 uint32_t addr = 0;
88
89 if (!up)
90 addr = (ones << 2) - 4;
91
92 if (!index)
93 addr += 4;
94
95 StaticInstPtr *uop = microOps;
96
97 // Add 0 to Rn and stick it in ureg0.
98 // This is equivalent to a move.

84 *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);

99 if (copy_base)
100 *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);

85
86 unsigned reg = 0;

101
102 unsigned reg = 0;

87 unsigned regIdx = 0;
88 bool force_user = user & !bits(reglist, 15);
89 bool exception_ret = user & bits(reglist, 15);

103 while (mem_ops != 0) {
104 // Do load operations in pairs if possible
105 if (load && mem_ops >= 2 &&
106 !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
107 // 64-bit memory operation
108 // Find 2 set register bits (clear them after finding)
109 unsigned reg_idx1;
110 unsigned reg_idx2;

90

111

91 for (int i = 0; i < ones; i++) {
92 // Find the next register.
93 while (!bits(regs, reg))
94 reg++;
95 replaceBits(regs, reg, 0);

112 // Find the first register
113 while (!bits(regs, reg)) reg++;
114 replaceBits(regs, reg, 0);
115 reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;

96

116

97 regIdx = reg;
98 if (force_user) {
99 regIdx = intRegInMode(MODE_USER, regIdx);
100 }

117 // Find the second register
118 while (!bits(regs, reg)) reg++;
119 replaceBits(regs, reg, 0);
120 reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;

101

121

102 if (load) {
103 if (writeback && i == ones - 1) {
104 // If it's a writeback and this is the last register
105 // do the load into a temporary register which we'll move
106 // into the final one later
107 *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0,
108 up, addr);
109 } else {
110 // Otherwise just do it normally
111 if (reg == INTREG_PC && exception_ret) {
112 // This must be the exception return form of ldm.
113 *++uop = new MicroLdrRetUop(machInst, regIdx,
114 INTREG_UREG0, up, addr);

122 // Load into temp reg if necessary
123 if (reg_idx2 == INTREG_PC && pc_temp)
124 reg_idx2 = INTREG_UREG1;
125
126 // Actually load both registers from memory
127 *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
128 copy_base ? INTREG_UREG0 : rn, up, addr);
129
130 if (!writeback && reg_idx2 == INTREG_PC) {
131 // No writeback if idx==pc, set appropriate flags
132 (*uop)->setFlag(StaticInst::IsControl);
133 (*uop)->setFlag(StaticInst::IsIndirectControl);
134
135 if (!(condCode == COND_AL || condCode == COND_UC))
136 (*uop)->setFlag(StaticInst::IsCondControl);
137 else
138 (*uop)->setFlag(StaticInst::IsUncondControl);
139 }
140
141 if (up) addr += 8;
142 else addr -= 8;
143 mem_ops -= 2;
144 } else {
145 // 32-bit memory operation
146 // Find register for operation
147 unsigned reg_idx;
148 while(!bits(regs, reg)) reg++;
149 replaceBits(regs, reg, 0);
150 reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
151
152 if (load) {
153 if (writeback && reg_idx == INTREG_PC) {
154 // If this instruction changes the PC and performs a
155 // writeback, ensure the pc load/branch is the last uop.
156 // Load into a temp reg here.
157 *uop = new MicroLdrUop(machInst, INTREG_UREG1,
158 copy_base ? INTREG_UREG0 : rn, up, addr);
159 } else if (reg_idx == INTREG_PC && exception_ret) {
160 // Special handling for exception return
161 *uop = new MicroLdrRetUop(machInst, reg_idx,
162 copy_base ? INTREG_UREG0 : rn, up, addr);
163 } else {
164 // standard single load uop
165 *uop = new MicroLdrUop(machInst, reg_idx,
166 copy_base ? INTREG_UREG0 : rn, up, addr);
167 }
168
169 // Loading pc as last operation? Set appropriate flags.
170 if (!writeback && reg_idx == INTREG_PC) {
171 (*uop)->setFlag(StaticInst::IsControl);
172 (*uop)->setFlag(StaticInst::IsIndirectControl);
173

115 if (!(condCode == COND_AL || condCode == COND_UC))
116 (*uop)->setFlag(StaticInst::IsCondControl);
117 else
118 (*uop)->setFlag(StaticInst::IsUncondControl);

174 if (!(condCode == COND_AL || condCode == COND_UC))
175 (*uop)->setFlag(StaticInst::IsCondControl);
176 else
177 (*uop)->setFlag(StaticInst::IsUncondControl);

119 } else {
120 *++uop = new MicroLdrUop(machInst, regIdx,
121 INTREG_UREG0, up, addr);
122 if (reg == INTREG_PC) {
123 (*uop)->setFlag(StaticInst::IsControl);
124 if (!(condCode == COND_AL || condCode == COND_UC))
125 (*uop)->setFlag(StaticInst::IsCondControl);
126 else
127 (*uop)->setFlag(StaticInst::IsUncondControl);
128 (*uop)->setFlag(StaticInst::IsIndirectControl);
129 }

130 }

178 }

179 } else {
180 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);

131 }

181 }

132 } else {
133 *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);

182
183 if (up) addr += 4;
184 else addr -= 4;
185 --mem_ops;

134 }
135

186 }
187

136 if (up)
137 addr += 4;
138 else
139 addr -= 4;

188 // Load/store micro-op generated, go to next uop
189 ++uop;

140 }
141
142 if (writeback && ones) {

190 }
191
192 if (writeback && ones) {

143 // put the register update after we're done all loading

193 // Perform writeback uop operation

144 if (up)

194 if (up)

145 *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4);

195 *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);

146 else

196 else

147 *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4);

197 *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);

148

198

149 // If this was a load move the last temporary value into place
150 // this way we can't take an exception after we update the base
151 // register.
152 if (load && reg == INTREG_PC && exception_ret) {
153 *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);

199 // Write PC after address writeback?
200 if (pc_temp) {
201 if (exception_ret) {
202 *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
203 } else {
204 *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
205 }
206 (*uop)->setFlag(StaticInst::IsControl);
207 (*uop)->setFlag(StaticInst::IsIndirectControl);
208

154 if (!(condCode == COND_AL || condCode == COND_UC))
155 (*uop)->setFlag(StaticInst::IsCondControl);
156 else
157 (*uop)->setFlag(StaticInst::IsUncondControl);

209 if (!(condCode == COND_AL || condCode == COND_UC))
210 (*uop)->setFlag(StaticInst::IsCondControl);
211 else
212 (*uop)->setFlag(StaticInst::IsUncondControl);

158 } else if (load) {
159 *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1);
160 if (reg == INTREG_PC) {
161 (*uop)->setFlag(StaticInst::IsControl);
162 (*uop)->setFlag(StaticInst::IsCondControl);
163 (*uop)->setFlag(StaticInst::IsIndirectControl);
164 // This is created as a RAS POP
165 if (rn == INTREG_SP)
166 (*uop)->setFlag(StaticInst::IsReturn);

167

213

168 }

214 if (rn == INTREG_SP)
215 (*uop)->setFlag(StaticInst::IsReturn);
216
217 ++uop;

169 }
170 }
171

218 }
219 }
220

221 --uop;

172 (*uop)->setLastMicroop();
173
174 /* Take the control flags from the last microop for the macroop */
175 if ((*uop)->isControl())
176 setFlag(StaticInst::IsControl);
177 if ((*uop)->isCondCtrl())
178 setFlag(StaticInst::IsCondControl);

222 (*uop)->setLastMicroop();
223
224 /* Take the control flags from the last microop for the macroop */
225 if ((*uop)->isControl())
226 setFlag(StaticInst::IsControl);
227 if ((*uop)->isCondCtrl())
228 setFlag(StaticInst::IsCondControl);

229 if ((*uop)->isUncondCtrl())
230 setFlag(StaticInst::IsUncondControl);

179 if ((*uop)->isIndirectCtrl())
180 setFlag(StaticInst::IsIndirectControl);
181 if ((*uop)->isReturn())
182 setFlag(StaticInst::IsReturn);
183

231 if ((*uop)->isIndirectCtrl())
232 setFlag(StaticInst::IsIndirectControl);
233 if ((*uop)->isReturn())
234 setFlag(StaticInst::IsReturn);
235

184 for (StaticInstPtr *curUop = microOps;
185 !(*curUop)->isLastMicroop(); curUop++) {
186 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
187 assert(uopPtr);
188 uopPtr->setDelayedCommit();

236 for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
237 (*uop)->setDelayedCommit();

189 }
190}
191
192PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
193 uint32_t size, bool fp, bool load, bool noAlloc,
194 bool signExt, bool exclusive, bool acrel,
195 int64_t imm, AddrMode mode,
196 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
197 PredMacroOp(mnem, machInst, __opClass)
198{

238 }
239}
240
241PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
242 uint32_t size, bool fp, bool load, bool noAlloc,
243 bool signExt, bool exclusive, bool acrel,
244 int64_t imm, AddrMode mode,
245 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
246 PredMacroOp(mnem, machInst, __opClass)
247{

248 bool post = (mode == AddrMd_PostIndex);

199 bool writeback = (mode != AddrMd_Offset);

249 bool writeback = (mode != AddrMd_Offset);

200 numMicroops = 1 + (size / 4) + (writeback ? 1 : 0);

250
251 if (load) {
252 // Use integer rounding to round up loads of size 4
253 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
254 } else {
255 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
256 }

201 microOps = new StaticInstPtr[numMicroops];
202
203 StaticInstPtr *uop = microOps;
204

257 microOps = new StaticInstPtr[numMicroops];
258
259 StaticInstPtr *uop = microOps;
260

205 bool post = (mode == AddrMd_PostIndex);
206

207 rn = makeSP(rn);
208

261 rn = makeSP(rn);
262

209 *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm);

263 if (!post) {
264 *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
265 post ? 0 : imm);
266 }

210
211 if (fp) {
212 if (size == 16) {
213 if (load) {

267
268 if (fp) {
269 if (size == 16) {
270 if (load) {

214 *++uop = new MicroLdrQBFpXImmUop(machInst, rt,
215 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
216 *++uop = new MicroLdrQTFpXImmUop(machInst, rt,
217 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
218 *++uop = new MicroLdrQBFpXImmUop(machInst, rt2,
219 INTREG_UREG0, 16, noAlloc, exclusive, acrel);
220 *++uop = new MicroLdrQTFpXImmUop(machInst, rt2,
221 INTREG_UREG0, 16, noAlloc, exclusive, acrel);

271 *uop++ = new MicroLdFp16Uop(machInst, rt,
272 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
273 *uop++ = new MicroLdFp16Uop(machInst, rt2,
274 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);

222 } else {

275 } else {

223 *++uop = new MicroStrQBFpXImmUop(machInst, rt,
224 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
225 *++uop = new MicroStrQTFpXImmUop(machInst, rt,
226 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
227 *++uop = new MicroStrQBFpXImmUop(machInst, rt2,
228 INTREG_UREG0, 16, noAlloc, exclusive, acrel);
229 *++uop = new MicroStrQTFpXImmUop(machInst, rt2,
230 INTREG_UREG0, 16, noAlloc, exclusive, acrel);

276 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
277 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
278 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
279 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
280 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
281 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
282 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
283 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);

231 }
232 } else if (size == 8) {
233 if (load) {

284 }
285 } else if (size == 8) {
286 if (load) {

234 *++uop = new MicroLdrFpXImmUop(machInst, rt,
235 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
236 *++uop = new MicroLdrFpXImmUop(machInst, rt2,
237 INTREG_UREG0, 8, noAlloc, exclusive, acrel);

287 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
288 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);

238 } else {

289 } else {

239 *++uop = new MicroStrFpXImmUop(machInst, rt,
240 INTREG_UREG0, 0, noAlloc, exclusive, acrel);
241 *++uop = new MicroStrFpXImmUop(machInst, rt2,
242 INTREG_UREG0, 8, noAlloc, exclusive, acrel);

290 *uop++ = new MicroStrFpXImmUop(machInst, rt,
291 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
292 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
293 post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);

243 }
244 } else if (size == 4) {
245 if (load) {

294 }
295 } else if (size == 4) {
296 if (load) {

246 *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2,
247 INTREG_UREG0, 0, noAlloc, exclusive, acrel);

297 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
298 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);

248 } else {

299 } else {

249 *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2,
250 INTREG_UREG0, 0, noAlloc, exclusive, acrel);

300 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
301 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);

251 }
252 }
253 } else {
254 if (size == 8) {
255 if (load) {

302 }
303 }
304 } else {
305 if (size == 8) {
306 if (load) {

256 *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0,
257 0, noAlloc, exclusive, acrel);
258 *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0,
259 size, noAlloc, exclusive, acrel);

307 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
308 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);

260 } else {

309 } else {

261 *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0,

310 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,

262 0, noAlloc, exclusive, acrel);

311 0, noAlloc, exclusive, acrel);

263 *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0,

312 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,

264 size, noAlloc, exclusive, acrel);
265 }
266 } else if (size == 4) {
267 if (load) {
268 if (signExt) {

313 size, noAlloc, exclusive, acrel);
314 }
315 } else if (size == 4) {
316 if (load) {
317 if (signExt) {

269 *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2,
270 INTREG_UREG0, 0, noAlloc, exclusive, acrel);

318 *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
319 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);

271 } else {

320 } else {

272 *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2,
273 INTREG_UREG0, 0, noAlloc, exclusive, acrel);

321 *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
322 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);

274 }
275 } else {

323 }
324 } else {

276 *++uop = new MicroStrDXImmUop(machInst, rt, rt2,
277 INTREG_UREG0, 0, noAlloc, exclusive, acrel);

325 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
326 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);

278 }
279 }
280 }
281
282 if (writeback) {

327 }
328 }
329 }
330
331 if (writeback) {

283 *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0,

332 *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,

284 post ? imm : 0);
285 }
286

333 post ? imm : 0);
334 }
335

287 (*uop)->setLastMicroop();

336 assert(uop == &microOps[numMicroops]);
337 (*--uop)->setLastMicroop();

288
289 for (StaticInstPtr *curUop = microOps;
290 !(*curUop)->isLastMicroop(); curUop++) {
291 (*curUop)->setDelayedCommit();
292 }
293}
294
295BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
296 OpClass __opClass, bool load, IntRegIndex dest,
297 IntRegIndex base, int64_t imm) :
298 PredMacroOp(mnem, machInst, __opClass)
299{

338
339 for (StaticInstPtr *curUop = microOps;
340 !(*curUop)->isLastMicroop(); curUop++) {
341 (*curUop)->setDelayedCommit();
342 }
343}
344
345BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
346 OpClass __opClass, bool load, IntRegIndex dest,
347 IntRegIndex base, int64_t imm) :
348 PredMacroOp(mnem, machInst, __opClass)
349{

300 numMicroops = 2;

350 numMicroops = load ? 1 : 2;

301 microOps = new StaticInstPtr[numMicroops];
302

351 microOps = new StaticInstPtr[numMicroops];
352

353 StaticInstPtr *uop = microOps;
354

303 if (load) {

355 if (load) {

304 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
305 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);

356 *uop = new MicroLdFp16Uop(machInst, dest, base, imm);

306 } else {

357 } else {

307 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
308 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);

358 *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
359 (*uop)->setDelayedCommit();
360 *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);

309 }

361 }

310 microOps[0]->setDelayedCommit();
311 microOps[1]->setLastMicroop();

362 (*uop)->setLastMicroop();

312}
313
314BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
315 OpClass __opClass, bool load, IntRegIndex dest,
316 IntRegIndex base, int64_t imm) :
317 PredMacroOp(mnem, machInst, __opClass)
318{

363}
364
365BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
366 OpClass __opClass, bool load, IntRegIndex dest,
367 IntRegIndex base, int64_t imm) :
368 PredMacroOp(mnem, machInst, __opClass)
369{

319 numMicroops = 3;

370 numMicroops = load ? 2 : 3;

320 microOps = new StaticInstPtr[numMicroops];
321

371 microOps = new StaticInstPtr[numMicroops];
372

373 StaticInstPtr *uop = microOps;
374

322 if (load) {

375 if (load) {

323 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0);
324 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0);

376 *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);

325 } else {

377 } else {

326 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0);
327 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0);

378 *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
379 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);

328 }

380 }

329 microOps[2] = new MicroAddXiUop(machInst, base, base, imm);

381 *uop = new MicroAddXiUop(machInst, base, base, imm);
382 (*uop)->setLastMicroop();

330

383

331 microOps[0]->setDelayedCommit();
332 microOps[1]->setDelayedCommit();
333 microOps[2]->setLastMicroop();

384 for (StaticInstPtr *curUop = microOps;
385 !(*curUop)->isLastMicroop(); curUop++) {
386 (*curUop)->setDelayedCommit();
387 }

334}
335
336BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
337 OpClass __opClass, bool load, IntRegIndex dest,
338 IntRegIndex base, int64_t imm) :
339 PredMacroOp(mnem, machInst, __opClass)
340{

388}
389
390BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
391 OpClass __opClass, bool load, IntRegIndex dest,
392 IntRegIndex base, int64_t imm) :
393 PredMacroOp(mnem, machInst, __opClass)
394{

341 numMicroops = 3;

395 numMicroops = load ? 2 : 3;

342 microOps = new StaticInstPtr[numMicroops];
343

396 microOps = new StaticInstPtr[numMicroops];
397

398 StaticInstPtr *uop = microOps;
399

344 if (load) {

400 if (load) {

345 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
346 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);

401 *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);

347 } else {

402 } else {

348 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
349 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);

403 *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
404 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);

350 }

405 }

351 microOps[2] = new MicroAddXiUop(machInst, base, base, imm);

406 *uop = new MicroAddXiUop(machInst, base, base, imm);
407 (*uop)->setLastMicroop();

352

408

353 microOps[0]->setDelayedCommit();
354 microOps[1]->setDelayedCommit();
355 microOps[2]->setLastMicroop();

409 for (StaticInstPtr *curUop = microOps;
410 !(*curUop)->isLastMicroop(); curUop++) {
411 (*curUop)->setDelayedCommit();
412 }

356}
357
358BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
359 OpClass __opClass, bool load, IntRegIndex dest,
360 IntRegIndex base, IntRegIndex offset,
361 ArmExtendType type, int64_t imm) :
362 PredMacroOp(mnem, machInst, __opClass)
363{

413}
414
415BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
416 OpClass __opClass, bool load, IntRegIndex dest,
417 IntRegIndex base, IntRegIndex offset,
418 ArmExtendType type, int64_t imm) :
419 PredMacroOp(mnem, machInst, __opClass)
420{

364 numMicroops = 2;

421 numMicroops = load ? 1 : 2;

365 microOps = new StaticInstPtr[numMicroops];
366

422 microOps = new StaticInstPtr[numMicroops];
423

424 StaticInstPtr *uop = microOps;
425

367 if (load) {

426 if (load) {

368 microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base,
369 offset, type, imm);
370 microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base,
371 offset, type, imm);

427 *uop = new MicroLdFp16RegUop(machInst, dest, base,
428 offset, type, imm);

372 } else {

429 } else {

373 microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base,
374 offset, type, imm);
375 microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base,
376 offset, type, imm);

430 *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
431 offset, type, imm);
432 (*uop)->setDelayedCommit();
433 *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
434 offset, type, imm);

377 }
378

435 }
436

379 microOps[0]->setDelayedCommit();
380 microOps[1]->setLastMicroop();

437 (*uop)->setLastMicroop();

381}
382
383BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
384 OpClass __opClass, IntRegIndex dest,
385 int64_t imm) :
386 PredMacroOp(mnem, machInst, __opClass)
387{

438}
439
440BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
441 OpClass __opClass, IntRegIndex dest,
442 int64_t imm) :
443 PredMacroOp(mnem, machInst, __opClass)
444{

388 numMicroops = 2;

445 numMicroops = 1;

389 microOps = new StaticInstPtr[numMicroops];
390

446 microOps = new StaticInstPtr[numMicroops];
447

391 microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm);
392 microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm);
393
394 microOps[0]->setDelayedCommit();
395 microOps[1]->setLastMicroop();

448 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
449 microOps[0]->setLastMicroop();

396}
397
398VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
399 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
400 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
401 PredMacroOp(mnem, machInst, __opClass)
402{
403 assert(regs > 0 && regs <= 4);
404 assert(regs % elems == 0);
405
406 numMicroops = (regs > 2) ? 2 : 1;
407 bool wb = (rm != 15);
408 bool deinterleave = (elems > 1);
409
410 if (wb) numMicroops++;
411 if (deinterleave) numMicroops += (regs / elems);
412 microOps = new StaticInstPtr[numMicroops];
413
414 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
415
416 uint32_t noAlign = TLB::MustBeOne;
417
418 unsigned uopIdx = 0;
419 switch (regs) {
420 case 4:
421 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
422 size, machInst, rMid, rn, 0, align);
423 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
424 size, machInst, rMid + 4, rn, 16, noAlign);
425 break;
426 case 3:
427 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
428 size, machInst, rMid, rn, 0, align);
429 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
430 size, machInst, rMid + 4, rn, 16, noAlign);
431 break;
432 case 2:
433 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
434 size, machInst, rMid, rn, 0, align);
435 break;
436 case 1:
437 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
438 size, machInst, rMid, rn, 0, align);
439 break;
440 default:
441 // Unknown number of registers
442 microOps[uopIdx++] = new Unknown(machInst);
443 }
444 if (wb) {
445 if (rm != 15 && rm != 13) {
446 microOps[uopIdx++] =
447 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
448 } else {
449 microOps[uopIdx++] =
450 new MicroAddiUop(machInst, rn, rn, regs * 8);
451 }
452 }
453 if (deinterleave) {
454 switch (elems) {
455 case 4:
456 assert(regs == 4);
457 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
458 size, machInst, vd * 2, rMid, inc * 2);
459 break;
460 case 3:
461 assert(regs == 3);
462 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
463 size, machInst, vd * 2, rMid, inc * 2);
464 break;
465 case 2:
466 assert(regs == 4 || regs == 2);
467 if (regs == 4) {
468 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
469 size, machInst, vd * 2, rMid, inc * 2);
470 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
471 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
472 } else {
473 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
474 size, machInst, vd * 2, rMid, inc * 2);
475 }
476 break;
477 default:
478 // Bad number of elements to deinterleave
479 microOps[uopIdx++] = new Unknown(machInst);
480 }
481 }
482 assert(uopIdx == numMicroops);
483
484 for (unsigned i = 0; i < numMicroops - 1; i++) {
485 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
486 assert(uopPtr);
487 uopPtr->setDelayedCommit();
488 }
489 microOps[numMicroops - 1]->setLastMicroop();
490}
491
492VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
493 OpClass __opClass, bool all, unsigned elems,
494 RegIndex rn, RegIndex vd, unsigned regs,
495 unsigned inc, uint32_t size, uint32_t align,
496 RegIndex rm, unsigned lane) :
497 PredMacroOp(mnem, machInst, __opClass)
498{
499 assert(regs > 0 && regs <= 4);
500 assert(regs % elems == 0);
501
502 unsigned eBytes = (1 << size);
503 unsigned loadSize = eBytes * elems;
504 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
505 sizeof(FloatRegBits);
506
507 assert(loadRegs > 0 && loadRegs <= 4);
508
509 numMicroops = 1;
510 bool wb = (rm != 15);
511
512 if (wb) numMicroops++;
513 numMicroops += (regs / elems);
514 microOps = new StaticInstPtr[numMicroops];
515
516 RegIndex ufp0 = NumFloatV7ArchRegs;
517
518 unsigned uopIdx = 0;
519 switch (loadSize) {
520 case 1:
521 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
522 machInst, ufp0, rn, 0, align);
523 break;
524 case 2:
525 if (eBytes == 2) {
526 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
527 machInst, ufp0, rn, 0, align);
528 } else {
529 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
530 machInst, ufp0, rn, 0, align);
531 }
532 break;
533 case 3:
534 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
535 machInst, ufp0, rn, 0, align);
536 break;
537 case 4:
538 switch (eBytes) {
539 case 1:
540 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
541 machInst, ufp0, rn, 0, align);
542 break;
543 case 2:
544 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
545 machInst, ufp0, rn, 0, align);
546 break;
547 case 4:
548 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
549 machInst, ufp0, rn, 0, align);
550 break;
551 }
552 break;
553 case 6:
554 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
555 machInst, ufp0, rn, 0, align);
556 break;
557 case 8:
558 switch (eBytes) {
559 case 2:
560 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
561 machInst, ufp0, rn, 0, align);
562 break;
563 case 4:
564 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
565 machInst, ufp0, rn, 0, align);
566 break;
567 }
568 break;
569 case 12:
570 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
571 machInst, ufp0, rn, 0, align);
572 break;
573 case 16:
574 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
575 machInst, ufp0, rn, 0, align);
576 break;
577 default:
578 // Unrecognized load size
579 microOps[uopIdx++] = new Unknown(machInst);
580 }
581 if (wb) {
582 if (rm != 15 && rm != 13) {
583 microOps[uopIdx++] =
584 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
585 } else {
586 microOps[uopIdx++] =
587 new MicroAddiUop(machInst, rn, rn, loadSize);
588 }
589 }
590 switch (elems) {
591 case 4:
592 assert(regs == 4);
593 switch (size) {
594 case 0:
595 if (all) {
596 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
597 machInst, vd * 2, ufp0, inc * 2);
598 } else {
599 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
600 machInst, vd * 2, ufp0, inc * 2, lane);
601 }
602 break;
603 case 1:
604 if (all) {
605 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
606 machInst, vd * 2, ufp0, inc * 2);
607 } else {
608 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
609 machInst, vd * 2, ufp0, inc * 2, lane);
610 }
611 break;
612 case 2:
613 if (all) {
614 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
615 machInst, vd * 2, ufp0, inc * 2);
616 } else {
617 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
618 machInst, vd * 2, ufp0, inc * 2, lane);
619 }
620 break;
621 default:
622 // Bad size
623 microOps[uopIdx++] = new Unknown(machInst);
624 break;
625 }
626 break;
627 case 3:
628 assert(regs == 3);
629 switch (size) {
630 case 0:
631 if (all) {
632 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
633 machInst, vd * 2, ufp0, inc * 2);
634 } else {
635 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
636 machInst, vd * 2, ufp0, inc * 2, lane);
637 }
638 break;
639 case 1:
640 if (all) {
641 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
642 machInst, vd * 2, ufp0, inc * 2);
643 } else {
644 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
645 machInst, vd * 2, ufp0, inc * 2, lane);
646 }
647 break;
648 case 2:
649 if (all) {
650 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
651 machInst, vd * 2, ufp0, inc * 2);
652 } else {
653 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
654 machInst, vd * 2, ufp0, inc * 2, lane);
655 }
656 break;
657 default:
658 // Bad size
659 microOps[uopIdx++] = new Unknown(machInst);
660 break;
661 }
662 break;
663 case 2:
664 assert(regs == 2);
665 assert(loadRegs <= 2);
666 switch (size) {
667 case 0:
668 if (all) {
669 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
670 machInst, vd * 2, ufp0, inc * 2);
671 } else {
672 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
673 machInst, vd * 2, ufp0, inc * 2, lane);
674 }
675 break;
676 case 1:
677 if (all) {
678 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
679 machInst, vd * 2, ufp0, inc * 2);
680 } else {
681 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
682 machInst, vd * 2, ufp0, inc * 2, lane);
683 }
684 break;
685 case 2:
686 if (all) {
687 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
688 machInst, vd * 2, ufp0, inc * 2);
689 } else {
690 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
691 machInst, vd * 2, ufp0, inc * 2, lane);
692 }
693 break;
694 default:
695 // Bad size
696 microOps[uopIdx++] = new Unknown(machInst);
697 break;
698 }
699 break;
700 case 1:
701 assert(regs == 1 || (all && regs == 2));
702 assert(loadRegs <= 2);
703 for (unsigned offset = 0; offset < regs; offset++) {
704 switch (size) {
705 case 0:
706 if (all) {
707 microOps[uopIdx++] =
708 new MicroUnpackAllNeon2to2Uop<uint8_t>(
709 machInst, (vd + offset) * 2, ufp0, inc * 2);
710 } else {
711 microOps[uopIdx++] =
712 new MicroUnpackNeon2to2Uop<uint8_t>(
713 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
714 }
715 break;
716 case 1:
717 if (all) {
718 microOps[uopIdx++] =
719 new MicroUnpackAllNeon2to2Uop<uint16_t>(
720 machInst, (vd + offset) * 2, ufp0, inc * 2);
721 } else {
722 microOps[uopIdx++] =
723 new MicroUnpackNeon2to2Uop<uint16_t>(
724 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
725 }
726 break;
727 case 2:
728 if (all) {
729 microOps[uopIdx++] =
730 new MicroUnpackAllNeon2to2Uop<uint32_t>(
731 machInst, (vd + offset) * 2, ufp0, inc * 2);
732 } else {
733 microOps[uopIdx++] =
734 new MicroUnpackNeon2to2Uop<uint32_t>(
735 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
736 }
737 break;
738 default:
739 // Bad size
740 microOps[uopIdx++] = new Unknown(machInst);
741 break;
742 }
743 }
744 break;
745 default:
746 // Bad number of elements to unpack
747 microOps[uopIdx++] = new Unknown(machInst);
748 }
749 assert(uopIdx == numMicroops);
750
751 for (unsigned i = 0; i < numMicroops - 1; i++) {
752 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
753 assert(uopPtr);
754 uopPtr->setDelayedCommit();
755 }
756 microOps[numMicroops - 1]->setLastMicroop();
757}
758
759VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
760 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
761 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
762 PredMacroOp(mnem, machInst, __opClass)
763{
764 assert(regs > 0 && regs <= 4);
765 assert(regs % elems == 0);
766
767 numMicroops = (regs > 2) ? 2 : 1;
768 bool wb = (rm != 15);
769 bool interleave = (elems > 1);
770
771 if (wb) numMicroops++;
772 if (interleave) numMicroops += (regs / elems);
773 microOps = new StaticInstPtr[numMicroops];
774
775 uint32_t noAlign = TLB::MustBeOne;
776
777 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
778
779 unsigned uopIdx = 0;
780 if (interleave) {
781 switch (elems) {
782 case 4:
783 assert(regs == 4);
784 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
785 size, machInst, rMid, vd * 2, inc * 2);
786 break;
787 case 3:
788 assert(regs == 3);
789 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
790 size, machInst, rMid, vd * 2, inc * 2);
791 break;
792 case 2:
793 assert(regs == 4 || regs == 2);
794 if (regs == 4) {
795 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
796 size, machInst, rMid, vd * 2, inc * 2);
797 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
798 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
799 } else {
800 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
801 size, machInst, rMid, vd * 2, inc * 2);
802 }
803 break;
804 default:
805 // Bad number of elements to interleave
806 microOps[uopIdx++] = new Unknown(machInst);
807 }
808 }
809 switch (regs) {
810 case 4:
811 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
812 size, machInst, rMid, rn, 0, align);
813 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
814 size, machInst, rMid + 4, rn, 16, noAlign);
815 break;
816 case 3:
817 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
818 size, machInst, rMid, rn, 0, align);
819 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
820 size, machInst, rMid + 4, rn, 16, noAlign);
821 break;
822 case 2:
823 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
824 size, machInst, rMid, rn, 0, align);
825 break;
826 case 1:
827 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
828 size, machInst, rMid, rn, 0, align);
829 break;
830 default:
831 // Unknown number of registers
832 microOps[uopIdx++] = new Unknown(machInst);
833 }
834 if (wb) {
835 if (rm != 15 && rm != 13) {
836 microOps[uopIdx++] =
837 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
838 } else {
839 microOps[uopIdx++] =
840 new MicroAddiUop(machInst, rn, rn, regs * 8);
841 }
842 }
843 assert(uopIdx == numMicroops);
844
845 for (unsigned i = 0; i < numMicroops - 1; i++) {
846 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
847 assert(uopPtr);
848 uopPtr->setDelayedCommit();
849 }
850 microOps[numMicroops - 1]->setLastMicroop();
851}
852
853VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
854 OpClass __opClass, bool all, unsigned elems,
855 RegIndex rn, RegIndex vd, unsigned regs,
856 unsigned inc, uint32_t size, uint32_t align,
857 RegIndex rm, unsigned lane) :
858 PredMacroOp(mnem, machInst, __opClass)
859{
860 assert(!all);
861 assert(regs > 0 && regs <= 4);
862 assert(regs % elems == 0);
863
864 unsigned eBytes = (1 << size);
865 unsigned storeSize = eBytes * elems;
866 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
867 sizeof(FloatRegBits);
868
869 assert(storeRegs > 0 && storeRegs <= 4);
870
871 numMicroops = 1;
872 bool wb = (rm != 15);
873
874 if (wb) numMicroops++;
875 numMicroops += (regs / elems);
876 microOps = new StaticInstPtr[numMicroops];
877
878 RegIndex ufp0 = NumFloatV7ArchRegs;
879
880 unsigned uopIdx = 0;
881 switch (elems) {
882 case 4:
883 assert(regs == 4);
884 switch (size) {
885 case 0:
886 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
887 machInst, ufp0, vd * 2, inc * 2, lane);
888 break;
889 case 1:
890 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
891 machInst, ufp0, vd * 2, inc * 2, lane);
892 break;
893 case 2:
894 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
895 machInst, ufp0, vd * 2, inc * 2, lane);
896 break;
897 default:
898 // Bad size
899 microOps[uopIdx++] = new Unknown(machInst);
900 break;
901 }
902 break;
903 case 3:
904 assert(regs == 3);
905 switch (size) {
906 case 0:
907 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
908 machInst, ufp0, vd * 2, inc * 2, lane);
909 break;
910 case 1:
911 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
912 machInst, ufp0, vd * 2, inc * 2, lane);
913 break;
914 case 2:
915 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
916 machInst, ufp0, vd * 2, inc * 2, lane);
917 break;
918 default:
919 // Bad size
920 microOps[uopIdx++] = new Unknown(machInst);
921 break;
922 }
923 break;
924 case 2:
925 assert(regs == 2);
926 assert(storeRegs <= 2);
927 switch (size) {
928 case 0:
929 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
930 machInst, ufp0, vd * 2, inc * 2, lane);
931 break;
932 case 1:
933 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
934 machInst, ufp0, vd * 2, inc * 2, lane);
935 break;
936 case 2:
937 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
938 machInst, ufp0, vd * 2, inc * 2, lane);
939 break;
940 default:
941 // Bad size
942 microOps[uopIdx++] = new Unknown(machInst);
943 break;
944 }
945 break;
946 case 1:
947 assert(regs == 1 || (all && regs == 2));
948 assert(storeRegs <= 2);
949 for (unsigned offset = 0; offset < regs; offset++) {
950 switch (size) {
951 case 0:
952 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
953 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
954 break;
955 case 1:
956 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
957 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
958 break;
959 case 2:
960 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
961 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
962 break;
963 default:
964 // Bad size
965 microOps[uopIdx++] = new Unknown(machInst);
966 break;
967 }
968 }
969 break;
970 default:
971 // Bad number of elements to unpack
972 microOps[uopIdx++] = new Unknown(machInst);
973 }
974 switch (storeSize) {
975 case 1:
976 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
977 machInst, ufp0, rn, 0, align);
978 break;
979 case 2:
980 if (eBytes == 2) {
981 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
982 machInst, ufp0, rn, 0, align);
983 } else {
984 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
985 machInst, ufp0, rn, 0, align);
986 }
987 break;
988 case 3:
989 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
990 machInst, ufp0, rn, 0, align);
991 break;
992 case 4:
993 switch (eBytes) {
994 case 1:
995 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
996 machInst, ufp0, rn, 0, align);
997 break;
998 case 2:
999 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1000 machInst, ufp0, rn, 0, align);
1001 break;
1002 case 4:
1003 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1004 machInst, ufp0, rn, 0, align);
1005 break;
1006 }
1007 break;
1008 case 6:
1009 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1010 machInst, ufp0, rn, 0, align);
1011 break;
1012 case 8:
1013 switch (eBytes) {
1014 case 2:
1015 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1016 machInst, ufp0, rn, 0, align);
1017 break;
1018 case 4:
1019 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1020 machInst, ufp0, rn, 0, align);
1021 break;
1022 }
1023 break;
1024 case 12:
1025 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1026 machInst, ufp0, rn, 0, align);
1027 break;
1028 case 16:
1029 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1030 machInst, ufp0, rn, 0, align);
1031 break;
1032 default:
1033 // Bad store size
1034 microOps[uopIdx++] = new Unknown(machInst);
1035 }
1036 if (wb) {
1037 if (rm != 15 && rm != 13) {
1038 microOps[uopIdx++] =
1039 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1040 } else {
1041 microOps[uopIdx++] =
1042 new MicroAddiUop(machInst, rn, rn, storeSize);
1043 }
1044 }
1045 assert(uopIdx == numMicroops);
1046
1047 for (unsigned i = 0; i < numMicroops - 1; i++) {
1048 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1049 assert(uopPtr);
1050 uopPtr->setDelayedCommit();
1051 }
1052 microOps[numMicroops - 1]->setLastMicroop();
1053}
1054
1055VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1056 OpClass __opClass, RegIndex rn, RegIndex vd,
1057 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1058 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1059 PredMacroOp(mnem, machInst, __opClass)
1060{
1061 RegIndex vx = NumFloatV8ArchRegs / 4;
1062 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1063 bool baseIsSP = isSP((IntRegIndex) rnsp);
1064
1065 numMicroops = wb ? 1 : 0;
1066
1067 int totNumBytes = numRegs * dataSize / 8;
1068 assert(totNumBytes <= 64);
1069
1070 // The guiding principle here is that no more than 16 bytes can be
1071 // transferred at a time
1072 int numMemMicroops = totNumBytes / 16;
1073 int residuum = totNumBytes % 16;
1074 if (residuum)
1075 ++numMemMicroops;
1076 numMicroops += numMemMicroops;
1077
1078 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1079 numMicroops += numMarshalMicroops;
1080
1081 microOps = new StaticInstPtr[numMicroops];
1082 unsigned uopIdx = 0;
1083 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1084 TLB::AllowUnaligned;
1085
1086 int i = 0;
1087 for(; i < numMemMicroops - 1; ++i) {
1088 microOps[uopIdx++] = new MicroNeonLoad64(
1089 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1090 baseIsSP, 16 /* accSize */, eSize);
1091 }
1092 microOps[uopIdx++] = new MicroNeonLoad64(
1093 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1094 residuum ? residuum : 16 /* accSize */, eSize);
1095
1096 // Writeback microop: the post-increment amount is encoded in "Rm": a
1097 // 64-bit general register OR as '11111' for an immediate value equal to
1098 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1099 if (wb) {
1100 if (rm != ((RegIndex) INTREG_X31)) {
1101 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1102 UXTX, 0);
1103 } else {
1104 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1105 totNumBytes);
1106 }
1107 }
1108
1109 for (int i = 0; i < numMarshalMicroops; ++i) {
1110 switch(numRegs) {
1111 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1112 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1113 numStructElems, 1, i /* step */);
1114 break;
1115 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1116 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1117 numStructElems, 2, i /* step */);
1118 break;
1119 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1120 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1121 numStructElems, 3, i /* step */);
1122 break;
1123 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1124 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1125 numStructElems, 4, i /* step */);
1126 break;
1127 default: panic("Invalid number of registers");
1128 }
1129
1130 }
1131
1132 assert(uopIdx == numMicroops);
1133
1134 for (int i = 0; i < numMicroops - 1; ++i) {
1135 microOps[i]->setDelayedCommit();
1136 }
1137 microOps[numMicroops - 1]->setLastMicroop();
1138}
1139
1140VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1141 OpClass __opClass, RegIndex rn, RegIndex vd,
1142 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1143 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1144 PredMacroOp(mnem, machInst, __opClass)
1145{
1146 RegIndex vx = NumFloatV8ArchRegs / 4;
1147 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1148 bool baseIsSP = isSP((IntRegIndex) rnsp);
1149
1150 numMicroops = wb ? 1 : 0;
1151
1152 int totNumBytes = numRegs * dataSize / 8;
1153 assert(totNumBytes <= 64);
1154
1155 // The guiding principle here is that no more than 16 bytes can be
1156 // transferred at a time
1157 int numMemMicroops = totNumBytes / 16;
1158 int residuum = totNumBytes % 16;
1159 if (residuum)
1160 ++numMemMicroops;
1161 numMicroops += numMemMicroops;
1162
1163 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1164 numMicroops += numMarshalMicroops;
1165
1166 microOps = new StaticInstPtr[numMicroops];
1167 unsigned uopIdx = 0;
1168
1169 for(int i = 0; i < numMarshalMicroops; ++i) {
1170 switch (numRegs) {
1171 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1172 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1173 numStructElems, 1, i /* step */);
1174 break;
1175 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1176 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1177 numStructElems, 2, i /* step */);
1178 break;
1179 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1180 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1181 numStructElems, 3, i /* step */);
1182 break;
1183 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1184 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1185 numStructElems, 4, i /* step */);
1186 break;
1187 default: panic("Invalid number of registers");
1188 }
1189 }
1190
1191 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1192 TLB::AllowUnaligned;
1193
1194 int i = 0;
1195 for(; i < numMemMicroops - 1; ++i) {
1196 microOps[uopIdx++] = new MicroNeonStore64(
1197 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1198 baseIsSP, 16 /* accSize */, eSize);
1199 }
1200 microOps[uopIdx++] = new MicroNeonStore64(
1201 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1202 residuum ? residuum : 16 /* accSize */, eSize);
1203
1204 // Writeback microop: the post-increment amount is encoded in "Rm": a
1205 // 64-bit general register OR as '11111' for an immediate value equal to
1206 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1207 if (wb) {
1208 if (rm != ((RegIndex) INTREG_X31)) {
1209 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1210 UXTX, 0);
1211 } else {
1212 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1213 totNumBytes);
1214 }
1215 }
1216
1217 assert(uopIdx == numMicroops);
1218
1219 for (int i = 0; i < numMicroops - 1; i++) {
1220 microOps[i]->setDelayedCommit();
1221 }
1222 microOps[numMicroops - 1]->setLastMicroop();
1223}
1224
1225VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1226 OpClass __opClass, RegIndex rn, RegIndex vd,
1227 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1228 uint8_t numStructElems, uint8_t index, bool wb,
1229 bool replicate) :
1230 PredMacroOp(mnem, machInst, __opClass)
1231{
1232 RegIndex vx = NumFloatV8ArchRegs / 4;
1233 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1234 bool baseIsSP = isSP((IntRegIndex) rnsp);
1235
1236 numMicroops = wb ? 1 : 0;
1237
1238 int eSizeBytes = 1 << eSize;
1239 int totNumBytes = numStructElems * eSizeBytes;
1240 assert(totNumBytes <= 64);
1241
1242 // The guiding principle here is that no more than 16 bytes can be
1243 // transferred at a time
1244 int numMemMicroops = totNumBytes / 16;
1245 int residuum = totNumBytes % 16;
1246 if (residuum)
1247 ++numMemMicroops;
1248 numMicroops += numMemMicroops;
1249
1250 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1251 numMicroops += numMarshalMicroops;
1252
1253 microOps = new StaticInstPtr[numMicroops];
1254 unsigned uopIdx = 0;
1255
1256 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1257 TLB::AllowUnaligned;
1258
1259 int i = 0;
1260 for (; i < numMemMicroops - 1; ++i) {
1261 microOps[uopIdx++] = new MicroNeonLoad64(
1262 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1263 baseIsSP, 16 /* accSize */, eSize);
1264 }
1265 microOps[uopIdx++] = new MicroNeonLoad64(
1266 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1267 residuum ? residuum : 16 /* accSize */, eSize);
1268
1269 // Writeback microop: the post-increment amount is encoded in "Rm": a
1270 // 64-bit general register OR as '11111' for an immediate value equal to
1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1272 if (wb) {
1273 if (rm != ((RegIndex) INTREG_X31)) {
1274 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1275 UXTX, 0);
1276 } else {
1277 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1278 totNumBytes);
1279 }
1280 }
1281
1282 for(int i = 0; i < numMarshalMicroops; ++i) {
1283 microOps[uopIdx++] = new MicroUnpackNeon64(
1284 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1285 numStructElems, index, i /* step */, replicate);
1286 }
1287
1288 assert(uopIdx == numMicroops);
1289
1290 for (int i = 0; i < numMicroops - 1; i++) {
1291 microOps[i]->setDelayedCommit();
1292 }
1293 microOps[numMicroops - 1]->setLastMicroop();
1294}
1295
1296VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1297 OpClass __opClass, RegIndex rn, RegIndex vd,
1298 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1299 uint8_t numStructElems, uint8_t index, bool wb,
1300 bool replicate) :
1301 PredMacroOp(mnem, machInst, __opClass)
1302{
1303 RegIndex vx = NumFloatV8ArchRegs / 4;
1304 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1305 bool baseIsSP = isSP((IntRegIndex) rnsp);
1306
1307 numMicroops = wb ? 1 : 0;
1308
1309 int eSizeBytes = 1 << eSize;
1310 int totNumBytes = numStructElems * eSizeBytes;
1311 assert(totNumBytes <= 64);
1312
1313 // The guiding principle here is that no more than 16 bytes can be
1314 // transferred at a time
1315 int numMemMicroops = totNumBytes / 16;
1316 int residuum = totNumBytes % 16;
1317 if (residuum)
1318 ++numMemMicroops;
1319 numMicroops += numMemMicroops;
1320
1321 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1322 numMicroops += numMarshalMicroops;
1323
1324 microOps = new StaticInstPtr[numMicroops];
1325 unsigned uopIdx = 0;
1326
1327 for(int i = 0; i < numMarshalMicroops; ++i) {
1328 microOps[uopIdx++] = new MicroPackNeon64(
1329 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1330 numStructElems, index, i /* step */, replicate);
1331 }
1332
1333 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1334 TLB::AllowUnaligned;
1335
1336 int i = 0;
1337 for(; i < numMemMicroops - 1; ++i) {
1338 microOps[uopIdx++] = new MicroNeonStore64(
1339 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1340 baseIsSP, 16 /* accsize */, eSize);
1341 }
1342 microOps[uopIdx++] = new MicroNeonStore64(
1343 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1344 residuum ? residuum : 16 /* accSize */, eSize);
1345
1346 // Writeback microop: the post-increment amount is encoded in "Rm": a
1347 // 64-bit general register OR as '11111' for an immediate value equal to
1348 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1349 if (wb) {
1350 if (rm != ((RegIndex) INTREG_X31)) {
1351 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1352 UXTX, 0);
1353 } else {
1354 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1355 totNumBytes);
1356 }
1357 }
1358
1359 assert(uopIdx == numMicroops);
1360
1361 for (int i = 0; i < numMicroops - 1; i++) {
1362 microOps[i]->setDelayedCommit();
1363 }
1364 microOps[numMicroops - 1]->setLastMicroop();
1365}
1366
1367MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1368 OpClass __opClass, IntRegIndex rn,
1369 RegIndex vd, bool single, bool up,
1370 bool writeback, bool load, uint32_t offset) :
1371 PredMacroOp(mnem, machInst, __opClass)
1372{
1373 int i = 0;
1374
1375 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1376 // to be functionally identical except that fldmx is deprecated. For now
1377 // we'll assume they're otherwise interchangable.
1378 int count = (single ? offset : (offset / 2));
1379 if (count == 0 || count > NumFloatV7ArchRegs)
1380 warn_once("Bad offset field for VFP load/store multiple.\n");
1381 if (count == 0) {
1382 // Force there to be at least one microop so the macroop makes sense.
1383 writeback = true;
1384 }
1385 if (count > NumFloatV7ArchRegs)
1386 count = NumFloatV7ArchRegs;
1387
1388 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1389 microOps = new StaticInstPtr[numMicroops];
1390
1391 int64_t addr = 0;
1392
1393 if (!up)
1394 addr = 4 * offset;
1395
1396 bool tempUp = up;
1397 for (int j = 0; j < count; j++) {
1398 if (load) {
1399 if (single) {
1400 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1401 tempUp, addr);
1402 } else {
1403 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1404 tempUp, addr);
1405 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1406 addr + (up ? 4 : -4));
1407 }
1408 } else {
1409 if (single) {
1410 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1411 tempUp, addr);
1412 } else {
1413 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1414 tempUp, addr);
1415 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1416 addr + (up ? 4 : -4));
1417 }
1418 }
1419 if (!tempUp) {
1420 addr -= (single ? 4 : 8);
1421 // The microops don't handle negative displacement, so turn if we
1422 // hit zero, flip polarity and start adding.
1423 if (addr <= 0) {
1424 tempUp = true;
1425 addr = -addr;
1426 }
1427 } else {
1428 addr += (single ? 4 : 8);
1429 }
1430 }
1431
1432 if (writeback) {
1433 if (up) {
1434 microOps[i++] =
1435 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1436 } else {
1437 microOps[i++] =
1438 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1439 }
1440 }
1441
1442 assert(numMicroops == i);
1443 microOps[numMicroops - 1]->setLastMicroop();
1444
1445 for (StaticInstPtr *curUop = microOps;
1446 !(*curUop)->isLastMicroop(); curUop++) {
1447 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1448 assert(uopPtr);
1449 uopPtr->setDelayedCommit();
1450 }
1451}
1452
1453std::string
1454MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1455{
1456 std::stringstream ss;
1457 printMnemonic(ss);
1458 printReg(ss, ura);
1459 ss << ", ";
1460 printReg(ss, urb);
1461 ss << ", ";
1462 ccprintf(ss, "#%d", imm);
1463 return ss.str();
1464}
1465
1466std::string
1467MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1468{
1469 std::stringstream ss;
1470 printMnemonic(ss);
1471 printReg(ss, ura);
1472 ss << ", ";
1473 printReg(ss, urb);
1474 ss << ", ";
1475 ccprintf(ss, "#%d", imm);
1476 return ss.str();
1477}
1478
1479std::string
1480MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1481{
1482 std::stringstream ss;
1483 printMnemonic(ss);
1484 ss << "[PC,CPSR]";
1485 return ss.str();
1486}
1487
1488std::string
1489MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1490{
1491 std::stringstream ss;
1492 printMnemonic(ss);
1493 printReg(ss, ura);
1494 ccprintf(ss, ", ");
1495 printReg(ss, urb);
1496 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1497 return ss.str();
1498}
1499
1500std::string
1501MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1502{
1503 std::stringstream ss;
1504 printMnemonic(ss);
1505 printReg(ss, ura);
1506 ss << ", ";
1507 printReg(ss, urb);
1508 return ss.str();
1509}
1510
1511std::string
1512MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1513{
1514 std::stringstream ss;
1515 printMnemonic(ss);
1516 printReg(ss, ura);
1517 ss << ", ";
1518 printReg(ss, urb);
1519 ss << ", ";
1520 printReg(ss, urc);
1521 return ss.str();
1522}
1523
1524std::string
1525MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1526{
1527 std::stringstream ss;
1528 printMnemonic(ss);
1529 if (isFloating())
1530 printReg(ss, ura + FP_Reg_Base);
1531 else
1532 printReg(ss, ura);
1533 ss << ", [";
1534 printReg(ss, urb);
1535 ss << ", ";
1536 ccprintf(ss, "#%d", imm);
1537 ss << "]";
1538 return ss.str();
1539}
1540

450}
451
452VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
453 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
454 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
455 PredMacroOp(mnem, machInst, __opClass)
456{
457 assert(regs > 0 && regs <= 4);
458 assert(regs % elems == 0);
459
460 numMicroops = (regs > 2) ? 2 : 1;
461 bool wb = (rm != 15);
462 bool deinterleave = (elems > 1);
463
464 if (wb) numMicroops++;
465 if (deinterleave) numMicroops += (regs / elems);
466 microOps = new StaticInstPtr[numMicroops];
467
468 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
469
470 uint32_t noAlign = TLB::MustBeOne;
471
472 unsigned uopIdx = 0;
473 switch (regs) {
474 case 4:
475 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
476 size, machInst, rMid, rn, 0, align);
477 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
478 size, machInst, rMid + 4, rn, 16, noAlign);
479 break;
480 case 3:
481 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
482 size, machInst, rMid, rn, 0, align);
483 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
484 size, machInst, rMid + 4, rn, 16, noAlign);
485 break;
486 case 2:
487 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
488 size, machInst, rMid, rn, 0, align);
489 break;
490 case 1:
491 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
492 size, machInst, rMid, rn, 0, align);
493 break;
494 default:
495 // Unknown number of registers
496 microOps[uopIdx++] = new Unknown(machInst);
497 }
498 if (wb) {
499 if (rm != 15 && rm != 13) {
500 microOps[uopIdx++] =
501 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
502 } else {
503 microOps[uopIdx++] =
504 new MicroAddiUop(machInst, rn, rn, regs * 8);
505 }
506 }
507 if (deinterleave) {
508 switch (elems) {
509 case 4:
510 assert(regs == 4);
511 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
512 size, machInst, vd * 2, rMid, inc * 2);
513 break;
514 case 3:
515 assert(regs == 3);
516 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
517 size, machInst, vd * 2, rMid, inc * 2);
518 break;
519 case 2:
520 assert(regs == 4 || regs == 2);
521 if (regs == 4) {
522 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
523 size, machInst, vd * 2, rMid, inc * 2);
524 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
525 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
526 } else {
527 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
528 size, machInst, vd * 2, rMid, inc * 2);
529 }
530 break;
531 default:
532 // Bad number of elements to deinterleave
533 microOps[uopIdx++] = new Unknown(machInst);
534 }
535 }
536 assert(uopIdx == numMicroops);
537
538 for (unsigned i = 0; i < numMicroops - 1; i++) {
539 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
540 assert(uopPtr);
541 uopPtr->setDelayedCommit();
542 }
543 microOps[numMicroops - 1]->setLastMicroop();
544}
545
546VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
547 OpClass __opClass, bool all, unsigned elems,
548 RegIndex rn, RegIndex vd, unsigned regs,
549 unsigned inc, uint32_t size, uint32_t align,
550 RegIndex rm, unsigned lane) :
551 PredMacroOp(mnem, machInst, __opClass)
552{
553 assert(regs > 0 && regs <= 4);
554 assert(regs % elems == 0);
555
556 unsigned eBytes = (1 << size);
557 unsigned loadSize = eBytes * elems;
558 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
559 sizeof(FloatRegBits);
560
561 assert(loadRegs > 0 && loadRegs <= 4);
562
563 numMicroops = 1;
564 bool wb = (rm != 15);
565
566 if (wb) numMicroops++;
567 numMicroops += (regs / elems);
568 microOps = new StaticInstPtr[numMicroops];
569
570 RegIndex ufp0 = NumFloatV7ArchRegs;
571
572 unsigned uopIdx = 0;
573 switch (loadSize) {
574 case 1:
575 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
576 machInst, ufp0, rn, 0, align);
577 break;
578 case 2:
579 if (eBytes == 2) {
580 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
581 machInst, ufp0, rn, 0, align);
582 } else {
583 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
584 machInst, ufp0, rn, 0, align);
585 }
586 break;
587 case 3:
588 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
589 machInst, ufp0, rn, 0, align);
590 break;
591 case 4:
592 switch (eBytes) {
593 case 1:
594 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
595 machInst, ufp0, rn, 0, align);
596 break;
597 case 2:
598 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
599 machInst, ufp0, rn, 0, align);
600 break;
601 case 4:
602 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
603 machInst, ufp0, rn, 0, align);
604 break;
605 }
606 break;
607 case 6:
608 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
609 machInst, ufp0, rn, 0, align);
610 break;
611 case 8:
612 switch (eBytes) {
613 case 2:
614 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
615 machInst, ufp0, rn, 0, align);
616 break;
617 case 4:
618 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
619 machInst, ufp0, rn, 0, align);
620 break;
621 }
622 break;
623 case 12:
624 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
625 machInst, ufp0, rn, 0, align);
626 break;
627 case 16:
628 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
629 machInst, ufp0, rn, 0, align);
630 break;
631 default:
632 // Unrecognized load size
633 microOps[uopIdx++] = new Unknown(machInst);
634 }
635 if (wb) {
636 if (rm != 15 && rm != 13) {
637 microOps[uopIdx++] =
638 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
639 } else {
640 microOps[uopIdx++] =
641 new MicroAddiUop(machInst, rn, rn, loadSize);
642 }
643 }
644 switch (elems) {
645 case 4:
646 assert(regs == 4);
647 switch (size) {
648 case 0:
649 if (all) {
650 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
651 machInst, vd * 2, ufp0, inc * 2);
652 } else {
653 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
654 machInst, vd * 2, ufp0, inc * 2, lane);
655 }
656 break;
657 case 1:
658 if (all) {
659 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
660 machInst, vd * 2, ufp0, inc * 2);
661 } else {
662 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
663 machInst, vd * 2, ufp0, inc * 2, lane);
664 }
665 break;
666 case 2:
667 if (all) {
668 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
669 machInst, vd * 2, ufp0, inc * 2);
670 } else {
671 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
672 machInst, vd * 2, ufp0, inc * 2, lane);
673 }
674 break;
675 default:
676 // Bad size
677 microOps[uopIdx++] = new Unknown(machInst);
678 break;
679 }
680 break;
681 case 3:
682 assert(regs == 3);
683 switch (size) {
684 case 0:
685 if (all) {
686 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
687 machInst, vd * 2, ufp0, inc * 2);
688 } else {
689 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
690 machInst, vd * 2, ufp0, inc * 2, lane);
691 }
692 break;
693 case 1:
694 if (all) {
695 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
696 machInst, vd * 2, ufp0, inc * 2);
697 } else {
698 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
699 machInst, vd * 2, ufp0, inc * 2, lane);
700 }
701 break;
702 case 2:
703 if (all) {
704 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
705 machInst, vd * 2, ufp0, inc * 2);
706 } else {
707 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
708 machInst, vd * 2, ufp0, inc * 2, lane);
709 }
710 break;
711 default:
712 // Bad size
713 microOps[uopIdx++] = new Unknown(machInst);
714 break;
715 }
716 break;
717 case 2:
718 assert(regs == 2);
719 assert(loadRegs <= 2);
720 switch (size) {
721 case 0:
722 if (all) {
723 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
724 machInst, vd * 2, ufp0, inc * 2);
725 } else {
726 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
727 machInst, vd * 2, ufp0, inc * 2, lane);
728 }
729 break;
730 case 1:
731 if (all) {
732 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
733 machInst, vd * 2, ufp0, inc * 2);
734 } else {
735 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
736 machInst, vd * 2, ufp0, inc * 2, lane);
737 }
738 break;
739 case 2:
740 if (all) {
741 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
742 machInst, vd * 2, ufp0, inc * 2);
743 } else {
744 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
745 machInst, vd * 2, ufp0, inc * 2, lane);
746 }
747 break;
748 default:
749 // Bad size
750 microOps[uopIdx++] = new Unknown(machInst);
751 break;
752 }
753 break;
754 case 1:
755 assert(regs == 1 || (all && regs == 2));
756 assert(loadRegs <= 2);
757 for (unsigned offset = 0; offset < regs; offset++) {
758 switch (size) {
759 case 0:
760 if (all) {
761 microOps[uopIdx++] =
762 new MicroUnpackAllNeon2to2Uop<uint8_t>(
763 machInst, (vd + offset) * 2, ufp0, inc * 2);
764 } else {
765 microOps[uopIdx++] =
766 new MicroUnpackNeon2to2Uop<uint8_t>(
767 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
768 }
769 break;
770 case 1:
771 if (all) {
772 microOps[uopIdx++] =
773 new MicroUnpackAllNeon2to2Uop<uint16_t>(
774 machInst, (vd + offset) * 2, ufp0, inc * 2);
775 } else {
776 microOps[uopIdx++] =
777 new MicroUnpackNeon2to2Uop<uint16_t>(
778 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
779 }
780 break;
781 case 2:
782 if (all) {
783 microOps[uopIdx++] =
784 new MicroUnpackAllNeon2to2Uop<uint32_t>(
785 machInst, (vd + offset) * 2, ufp0, inc * 2);
786 } else {
787 microOps[uopIdx++] =
788 new MicroUnpackNeon2to2Uop<uint32_t>(
789 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
790 }
791 break;
792 default:
793 // Bad size
794 microOps[uopIdx++] = new Unknown(machInst);
795 break;
796 }
797 }
798 break;
799 default:
800 // Bad number of elements to unpack
801 microOps[uopIdx++] = new Unknown(machInst);
802 }
803 assert(uopIdx == numMicroops);
804
805 for (unsigned i = 0; i < numMicroops - 1; i++) {
806 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
807 assert(uopPtr);
808 uopPtr->setDelayedCommit();
809 }
810 microOps[numMicroops - 1]->setLastMicroop();
811}
812
813VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
814 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
815 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
816 PredMacroOp(mnem, machInst, __opClass)
817{
818 assert(regs > 0 && regs <= 4);
819 assert(regs % elems == 0);
820
821 numMicroops = (regs > 2) ? 2 : 1;
822 bool wb = (rm != 15);
823 bool interleave = (elems > 1);
824
825 if (wb) numMicroops++;
826 if (interleave) numMicroops += (regs / elems);
827 microOps = new StaticInstPtr[numMicroops];
828
829 uint32_t noAlign = TLB::MustBeOne;
830
831 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
832
833 unsigned uopIdx = 0;
834 if (interleave) {
835 switch (elems) {
836 case 4:
837 assert(regs == 4);
838 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
839 size, machInst, rMid, vd * 2, inc * 2);
840 break;
841 case 3:
842 assert(regs == 3);
843 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
844 size, machInst, rMid, vd * 2, inc * 2);
845 break;
846 case 2:
847 assert(regs == 4 || regs == 2);
848 if (regs == 4) {
849 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
850 size, machInst, rMid, vd * 2, inc * 2);
851 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
852 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
853 } else {
854 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
855 size, machInst, rMid, vd * 2, inc * 2);
856 }
857 break;
858 default:
859 // Bad number of elements to interleave
860 microOps[uopIdx++] = new Unknown(machInst);
861 }
862 }
863 switch (regs) {
864 case 4:
865 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
866 size, machInst, rMid, rn, 0, align);
867 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
868 size, machInst, rMid + 4, rn, 16, noAlign);
869 break;
870 case 3:
871 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
872 size, machInst, rMid, rn, 0, align);
873 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
874 size, machInst, rMid + 4, rn, 16, noAlign);
875 break;
876 case 2:
877 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
878 size, machInst, rMid, rn, 0, align);
879 break;
880 case 1:
881 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
882 size, machInst, rMid, rn, 0, align);
883 break;
884 default:
885 // Unknown number of registers
886 microOps[uopIdx++] = new Unknown(machInst);
887 }
888 if (wb) {
889 if (rm != 15 && rm != 13) {
890 microOps[uopIdx++] =
891 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
892 } else {
893 microOps[uopIdx++] =
894 new MicroAddiUop(machInst, rn, rn, regs * 8);
895 }
896 }
897 assert(uopIdx == numMicroops);
898
899 for (unsigned i = 0; i < numMicroops - 1; i++) {
900 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
901 assert(uopPtr);
902 uopPtr->setDelayedCommit();
903 }
904 microOps[numMicroops - 1]->setLastMicroop();
905}
906
907VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
908 OpClass __opClass, bool all, unsigned elems,
909 RegIndex rn, RegIndex vd, unsigned regs,
910 unsigned inc, uint32_t size, uint32_t align,
911 RegIndex rm, unsigned lane) :
912 PredMacroOp(mnem, machInst, __opClass)
913{
914 assert(!all);
915 assert(regs > 0 && regs <= 4);
916 assert(regs % elems == 0);
917
918 unsigned eBytes = (1 << size);
919 unsigned storeSize = eBytes * elems;
920 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
921 sizeof(FloatRegBits);
922
923 assert(storeRegs > 0 && storeRegs <= 4);
924
925 numMicroops = 1;
926 bool wb = (rm != 15);
927
928 if (wb) numMicroops++;
929 numMicroops += (regs / elems);
930 microOps = new StaticInstPtr[numMicroops];
931
932 RegIndex ufp0 = NumFloatV7ArchRegs;
933
934 unsigned uopIdx = 0;
935 switch (elems) {
936 case 4:
937 assert(regs == 4);
938 switch (size) {
939 case 0:
940 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
941 machInst, ufp0, vd * 2, inc * 2, lane);
942 break;
943 case 1:
944 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
945 machInst, ufp0, vd * 2, inc * 2, lane);
946 break;
947 case 2:
948 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
949 machInst, ufp0, vd * 2, inc * 2, lane);
950 break;
951 default:
952 // Bad size
953 microOps[uopIdx++] = new Unknown(machInst);
954 break;
955 }
956 break;
957 case 3:
958 assert(regs == 3);
959 switch (size) {
960 case 0:
961 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
962 machInst, ufp0, vd * 2, inc * 2, lane);
963 break;
964 case 1:
965 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
966 machInst, ufp0, vd * 2, inc * 2, lane);
967 break;
968 case 2:
969 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
970 machInst, ufp0, vd * 2, inc * 2, lane);
971 break;
972 default:
973 // Bad size
974 microOps[uopIdx++] = new Unknown(machInst);
975 break;
976 }
977 break;
978 case 2:
979 assert(regs == 2);
980 assert(storeRegs <= 2);
981 switch (size) {
982 case 0:
983 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
984 machInst, ufp0, vd * 2, inc * 2, lane);
985 break;
986 case 1:
987 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
988 machInst, ufp0, vd * 2, inc * 2, lane);
989 break;
990 case 2:
991 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
992 machInst, ufp0, vd * 2, inc * 2, lane);
993 break;
994 default:
995 // Bad size
996 microOps[uopIdx++] = new Unknown(machInst);
997 break;
998 }
999 break;
1000 case 1:
1001 assert(regs == 1 || (all && regs == 2));
1002 assert(storeRegs <= 2);
1003 for (unsigned offset = 0; offset < regs; offset++) {
1004 switch (size) {
1005 case 0:
1006 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1007 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1008 break;
1009 case 1:
1010 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1011 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1012 break;
1013 case 2:
1014 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1015 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1016 break;
1017 default:
1018 // Bad size
1019 microOps[uopIdx++] = new Unknown(machInst);
1020 break;
1021 }
1022 }
1023 break;
1024 default:
1025 // Bad number of elements to unpack
1026 microOps[uopIdx++] = new Unknown(machInst);
1027 }
1028 switch (storeSize) {
1029 case 1:
1030 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1031 machInst, ufp0, rn, 0, align);
1032 break;
1033 case 2:
1034 if (eBytes == 2) {
1035 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1036 machInst, ufp0, rn, 0, align);
1037 } else {
1038 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1039 machInst, ufp0, rn, 0, align);
1040 }
1041 break;
1042 case 3:
1043 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1044 machInst, ufp0, rn, 0, align);
1045 break;
1046 case 4:
1047 switch (eBytes) {
1048 case 1:
1049 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1050 machInst, ufp0, rn, 0, align);
1051 break;
1052 case 2:
1053 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1054 machInst, ufp0, rn, 0, align);
1055 break;
1056 case 4:
1057 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1058 machInst, ufp0, rn, 0, align);
1059 break;
1060 }
1061 break;
1062 case 6:
1063 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1064 machInst, ufp0, rn, 0, align);
1065 break;
1066 case 8:
1067 switch (eBytes) {
1068 case 2:
1069 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1070 machInst, ufp0, rn, 0, align);
1071 break;
1072 case 4:
1073 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1074 machInst, ufp0, rn, 0, align);
1075 break;
1076 }
1077 break;
1078 case 12:
1079 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1080 machInst, ufp0, rn, 0, align);
1081 break;
1082 case 16:
1083 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1084 machInst, ufp0, rn, 0, align);
1085 break;
1086 default:
1087 // Bad store size
1088 microOps[uopIdx++] = new Unknown(machInst);
1089 }
1090 if (wb) {
1091 if (rm != 15 && rm != 13) {
1092 microOps[uopIdx++] =
1093 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1094 } else {
1095 microOps[uopIdx++] =
1096 new MicroAddiUop(machInst, rn, rn, storeSize);
1097 }
1098 }
1099 assert(uopIdx == numMicroops);
1100
1101 for (unsigned i = 0; i < numMicroops - 1; i++) {
1102 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1103 assert(uopPtr);
1104 uopPtr->setDelayedCommit();
1105 }
1106 microOps[numMicroops - 1]->setLastMicroop();
1107}
1108
1109VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1110 OpClass __opClass, RegIndex rn, RegIndex vd,
1111 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1112 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1113 PredMacroOp(mnem, machInst, __opClass)
1114{
1115 RegIndex vx = NumFloatV8ArchRegs / 4;
1116 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1117 bool baseIsSP = isSP((IntRegIndex) rnsp);
1118
1119 numMicroops = wb ? 1 : 0;
1120
1121 int totNumBytes = numRegs * dataSize / 8;
1122 assert(totNumBytes <= 64);
1123
1124 // The guiding principle here is that no more than 16 bytes can be
1125 // transferred at a time
1126 int numMemMicroops = totNumBytes / 16;
1127 int residuum = totNumBytes % 16;
1128 if (residuum)
1129 ++numMemMicroops;
1130 numMicroops += numMemMicroops;
1131
1132 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1133 numMicroops += numMarshalMicroops;
1134
1135 microOps = new StaticInstPtr[numMicroops];
1136 unsigned uopIdx = 0;
1137 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1138 TLB::AllowUnaligned;
1139
1140 int i = 0;
1141 for(; i < numMemMicroops - 1; ++i) {
1142 microOps[uopIdx++] = new MicroNeonLoad64(
1143 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1144 baseIsSP, 16 /* accSize */, eSize);
1145 }
1146 microOps[uopIdx++] = new MicroNeonLoad64(
1147 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1148 residuum ? residuum : 16 /* accSize */, eSize);
1149
1150 // Writeback microop: the post-increment amount is encoded in "Rm": a
1151 // 64-bit general register OR as '11111' for an immediate value equal to
1152 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1153 if (wb) {
1154 if (rm != ((RegIndex) INTREG_X31)) {
1155 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1156 UXTX, 0);
1157 } else {
1158 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1159 totNumBytes);
1160 }
1161 }
1162
1163 for (int i = 0; i < numMarshalMicroops; ++i) {
1164 switch(numRegs) {
1165 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1166 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1167 numStructElems, 1, i /* step */);
1168 break;
1169 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1170 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1171 numStructElems, 2, i /* step */);
1172 break;
1173 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1174 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1175 numStructElems, 3, i /* step */);
1176 break;
1177 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1178 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1179 numStructElems, 4, i /* step */);
1180 break;
1181 default: panic("Invalid number of registers");
1182 }
1183
1184 }
1185
1186 assert(uopIdx == numMicroops);
1187
1188 for (int i = 0; i < numMicroops - 1; ++i) {
1189 microOps[i]->setDelayedCommit();
1190 }
1191 microOps[numMicroops - 1]->setLastMicroop();
1192}
1193
1194VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1195 OpClass __opClass, RegIndex rn, RegIndex vd,
1196 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1197 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1198 PredMacroOp(mnem, machInst, __opClass)
1199{
1200 RegIndex vx = NumFloatV8ArchRegs / 4;
1201 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1202 bool baseIsSP = isSP((IntRegIndex) rnsp);
1203
1204 numMicroops = wb ? 1 : 0;
1205
1206 int totNumBytes = numRegs * dataSize / 8;
1207 assert(totNumBytes <= 64);
1208
1209 // The guiding principle here is that no more than 16 bytes can be
1210 // transferred at a time
1211 int numMemMicroops = totNumBytes / 16;
1212 int residuum = totNumBytes % 16;
1213 if (residuum)
1214 ++numMemMicroops;
1215 numMicroops += numMemMicroops;
1216
1217 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1218 numMicroops += numMarshalMicroops;
1219
1220 microOps = new StaticInstPtr[numMicroops];
1221 unsigned uopIdx = 0;
1222
1223 for(int i = 0; i < numMarshalMicroops; ++i) {
1224 switch (numRegs) {
1225 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1226 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1227 numStructElems, 1, i /* step */);
1228 break;
1229 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1230 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1231 numStructElems, 2, i /* step */);
1232 break;
1233 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1234 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1235 numStructElems, 3, i /* step */);
1236 break;
1237 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1238 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1239 numStructElems, 4, i /* step */);
1240 break;
1241 default: panic("Invalid number of registers");
1242 }
1243 }
1244
1245 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1246 TLB::AllowUnaligned;
1247
1248 int i = 0;
1249 for(; i < numMemMicroops - 1; ++i) {
1250 microOps[uopIdx++] = new MicroNeonStore64(
1251 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1252 baseIsSP, 16 /* accSize */, eSize);
1253 }
1254 microOps[uopIdx++] = new MicroNeonStore64(
1255 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1256 residuum ? residuum : 16 /* accSize */, eSize);
1257
1258 // Writeback microop: the post-increment amount is encoded in "Rm": a
1259 // 64-bit general register OR as '11111' for an immediate value equal to
1260 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1261 if (wb) {
1262 if (rm != ((RegIndex) INTREG_X31)) {
1263 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1264 UXTX, 0);
1265 } else {
1266 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1267 totNumBytes);
1268 }
1269 }
1270
1271 assert(uopIdx == numMicroops);
1272
1273 for (int i = 0; i < numMicroops - 1; i++) {
1274 microOps[i]->setDelayedCommit();
1275 }
1276 microOps[numMicroops - 1]->setLastMicroop();
1277}
1278
1279VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1280 OpClass __opClass, RegIndex rn, RegIndex vd,
1281 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1282 uint8_t numStructElems, uint8_t index, bool wb,
1283 bool replicate) :
1284 PredMacroOp(mnem, machInst, __opClass)
1285{
1286 RegIndex vx = NumFloatV8ArchRegs / 4;
1287 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1288 bool baseIsSP = isSP((IntRegIndex) rnsp);
1289
1290 numMicroops = wb ? 1 : 0;
1291
1292 int eSizeBytes = 1 << eSize;
1293 int totNumBytes = numStructElems * eSizeBytes;
1294 assert(totNumBytes <= 64);
1295
1296 // The guiding principle here is that no more than 16 bytes can be
1297 // transferred at a time
1298 int numMemMicroops = totNumBytes / 16;
1299 int residuum = totNumBytes % 16;
1300 if (residuum)
1301 ++numMemMicroops;
1302 numMicroops += numMemMicroops;
1303
1304 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1305 numMicroops += numMarshalMicroops;
1306
1307 microOps = new StaticInstPtr[numMicroops];
1308 unsigned uopIdx = 0;
1309
1310 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1311 TLB::AllowUnaligned;
1312
1313 int i = 0;
1314 for (; i < numMemMicroops - 1; ++i) {
1315 microOps[uopIdx++] = new MicroNeonLoad64(
1316 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1317 baseIsSP, 16 /* accSize */, eSize);
1318 }
1319 microOps[uopIdx++] = new MicroNeonLoad64(
1320 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1321 residuum ? residuum : 16 /* accSize */, eSize);
1322
1323 // Writeback microop: the post-increment amount is encoded in "Rm": a
1324 // 64-bit general register OR as '11111' for an immediate value equal to
1325 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1326 if (wb) {
1327 if (rm != ((RegIndex) INTREG_X31)) {
1328 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1329 UXTX, 0);
1330 } else {
1331 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1332 totNumBytes);
1333 }
1334 }
1335
1336 for(int i = 0; i < numMarshalMicroops; ++i) {
1337 microOps[uopIdx++] = new MicroUnpackNeon64(
1338 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1339 numStructElems, index, i /* step */, replicate);
1340 }
1341
1342 assert(uopIdx == numMicroops);
1343
1344 for (int i = 0; i < numMicroops - 1; i++) {
1345 microOps[i]->setDelayedCommit();
1346 }
1347 microOps[numMicroops - 1]->setLastMicroop();
1348}
1349
1350VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1351 OpClass __opClass, RegIndex rn, RegIndex vd,
1352 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1353 uint8_t numStructElems, uint8_t index, bool wb,
1354 bool replicate) :
1355 PredMacroOp(mnem, machInst, __opClass)
1356{
1357 RegIndex vx = NumFloatV8ArchRegs / 4;
1358 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1359 bool baseIsSP = isSP((IntRegIndex) rnsp);
1360
1361 numMicroops = wb ? 1 : 0;
1362
1363 int eSizeBytes = 1 << eSize;
1364 int totNumBytes = numStructElems * eSizeBytes;
1365 assert(totNumBytes <= 64);
1366
1367 // The guiding principle here is that no more than 16 bytes can be
1368 // transferred at a time
1369 int numMemMicroops = totNumBytes / 16;
1370 int residuum = totNumBytes % 16;
1371 if (residuum)
1372 ++numMemMicroops;
1373 numMicroops += numMemMicroops;
1374
1375 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1376 numMicroops += numMarshalMicroops;
1377
1378 microOps = new StaticInstPtr[numMicroops];
1379 unsigned uopIdx = 0;
1380
1381 for(int i = 0; i < numMarshalMicroops; ++i) {
1382 microOps[uopIdx++] = new MicroPackNeon64(
1383 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1384 numStructElems, index, i /* step */, replicate);
1385 }
1386
1387 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1388 TLB::AllowUnaligned;
1389
1390 int i = 0;
1391 for(; i < numMemMicroops - 1; ++i) {
1392 microOps[uopIdx++] = new MicroNeonStore64(
1393 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1394 baseIsSP, 16 /* accsize */, eSize);
1395 }
1396 microOps[uopIdx++] = new MicroNeonStore64(
1397 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1398 residuum ? residuum : 16 /* accSize */, eSize);
1399
1400 // Writeback microop: the post-increment amount is encoded in "Rm": a
1401 // 64-bit general register OR as '11111' for an immediate value equal to
1402 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1403 if (wb) {
1404 if (rm != ((RegIndex) INTREG_X31)) {
1405 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1406 UXTX, 0);
1407 } else {
1408 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1409 totNumBytes);
1410 }
1411 }
1412
1413 assert(uopIdx == numMicroops);
1414
1415 for (int i = 0; i < numMicroops - 1; i++) {
1416 microOps[i]->setDelayedCommit();
1417 }
1418 microOps[numMicroops - 1]->setLastMicroop();
1419}
1420
1421MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1422 OpClass __opClass, IntRegIndex rn,
1423 RegIndex vd, bool single, bool up,
1424 bool writeback, bool load, uint32_t offset) :
1425 PredMacroOp(mnem, machInst, __opClass)
1426{
1427 int i = 0;
1428
1429 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1430 // to be functionally identical except that fldmx is deprecated. For now
1431 // we'll assume they're otherwise interchangable.
1432 int count = (single ? offset : (offset / 2));
1433 if (count == 0 || count > NumFloatV7ArchRegs)
1434 warn_once("Bad offset field for VFP load/store multiple.\n");
1435 if (count == 0) {
1436 // Force there to be at least one microop so the macroop makes sense.
1437 writeback = true;
1438 }
1439 if (count > NumFloatV7ArchRegs)
1440 count = NumFloatV7ArchRegs;
1441
1442 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1443 microOps = new StaticInstPtr[numMicroops];
1444
1445 int64_t addr = 0;
1446
1447 if (!up)
1448 addr = 4 * offset;
1449
1450 bool tempUp = up;
1451 for (int j = 0; j < count; j++) {
1452 if (load) {
1453 if (single) {
1454 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1455 tempUp, addr);
1456 } else {
1457 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1458 tempUp, addr);
1459 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1460 addr + (up ? 4 : -4));
1461 }
1462 } else {
1463 if (single) {
1464 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1465 tempUp, addr);
1466 } else {
1467 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1468 tempUp, addr);
1469 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1470 addr + (up ? 4 : -4));
1471 }
1472 }
1473 if (!tempUp) {
1474 addr -= (single ? 4 : 8);
1475 // The microops don't handle negative displacement, so turn if we
1476 // hit zero, flip polarity and start adding.
1477 if (addr <= 0) {
1478 tempUp = true;
1479 addr = -addr;
1480 }
1481 } else {
1482 addr += (single ? 4 : 8);
1483 }
1484 }
1485
1486 if (writeback) {
1487 if (up) {
1488 microOps[i++] =
1489 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1490 } else {
1491 microOps[i++] =
1492 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1493 }
1494 }
1495
1496 assert(numMicroops == i);
1497 microOps[numMicroops - 1]->setLastMicroop();
1498
1499 for (StaticInstPtr *curUop = microOps;
1500 !(*curUop)->isLastMicroop(); curUop++) {
1501 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1502 assert(uopPtr);
1503 uopPtr->setDelayedCommit();
1504 }
1505}
1506
1507std::string
1508MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1509{
1510 std::stringstream ss;
1511 printMnemonic(ss);
1512 printReg(ss, ura);
1513 ss << ", ";
1514 printReg(ss, urb);
1515 ss << ", ";
1516 ccprintf(ss, "#%d", imm);
1517 return ss.str();
1518}
1519
1520std::string
1521MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1522{
1523 std::stringstream ss;
1524 printMnemonic(ss);
1525 printReg(ss, ura);
1526 ss << ", ";
1527 printReg(ss, urb);
1528 ss << ", ";
1529 ccprintf(ss, "#%d", imm);
1530 return ss.str();
1531}
1532
1533std::string
1534MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1535{
1536 std::stringstream ss;
1537 printMnemonic(ss);
1538 ss << "[PC,CPSR]";
1539 return ss.str();
1540}
1541
1542std::string
1543MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1544{
1545 std::stringstream ss;
1546 printMnemonic(ss);
1547 printReg(ss, ura);
1548 ccprintf(ss, ", ");
1549 printReg(ss, urb);
1550 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1551 return ss.str();
1552}
1553
1554std::string
1555MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1556{
1557 std::stringstream ss;
1558 printMnemonic(ss);
1559 printReg(ss, ura);
1560 ss << ", ";
1561 printReg(ss, urb);
1562 return ss.str();
1563}
1564
1565std::string
1566MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1567{
1568 std::stringstream ss;
1569 printMnemonic(ss);
1570 printReg(ss, ura);
1571 ss << ", ";
1572 printReg(ss, urb);
1573 ss << ", ";
1574 printReg(ss, urc);
1575 return ss.str();
1576}
1577
1578std::string
1579MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1580{
1581 std::stringstream ss;
1582 printMnemonic(ss);
1583 if (isFloating())
1584 printReg(ss, ura + FP_Reg_Base);
1585 else
1586 printReg(ss, ura);
1587 ss << ", [";
1588 printReg(ss, urb);
1589 ss << ", ";
1590 ccprintf(ss, "#%d", imm);
1591 ss << "]";
1592 return ss.str();
1593}
1594

1595std::string
1596MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1597{
1598 std::stringstream ss;
1599 printMnemonic(ss);
1600 printReg(ss, dest);
1601 ss << ",";
1602 printReg(ss, dest2);
1603 ss << ", [";
1604 printReg(ss, urb);
1605 ss << ", ";
1606 ccprintf(ss, "#%d", imm);
1607 ss << "]";
1608 return ss.str();

1541}

1609}

1610
1611}