Cross Reference: /gem5/src/arch/arm/insts/macromem.cc

Deleted Added

sdiff udiff text old ( 11321:02e930db812d ) new ( 11793:ef606668d247 )

full compact

macromem.cc (11321:02e930db812d)	macromem.cc (11793:ef606668d247)
1/* 2 * Copyright (c) 2010-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2007-2008 The Florida State University 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Stephen Hines 41 */ 42	1/* 2 * Copyright (c) 2010-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2007-2008 The Florida State University 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Stephen Hines 41 */ 42
43#include <sstream> 44
45#include "arch/arm/insts/macromem.hh" 46	43#include "arch/arm/insts/macromem.hh" 44
	45#include <sstream> 46
47#include "arch/arm/generated/decoder.hh" 48#include "arch/arm/insts/neon64_mem.hh" 49 50using namespace std; 51using namespace ArmISAInst; 52 53namespace ArmISA 54{ 55 56MacroMemOp::MacroMemOp(const char mnem, ExtMachInst machInst, 57 OpClass __opClass, IntRegIndex rn, 58 bool index, bool up, bool user, bool writeback, 59 bool load, uint32_t reglist) : 60 PredMacroOp(mnem, machInst, __opClass) 61{ 62 uint32_t regs = reglist; 63 uint32_t ones = number_of_ones(reglist); 64 uint32_t mem_ops = ones; 65 66 // Copy the base address register if we overwrite it, or if this instruction 67 // is basically a no-op (we have to do something) 68 bool copy_base = (bits(reglist, rn) && load) \|\| !ones; 69 bool force_user = user & !bits(reglist, 15); 70 bool exception_ret = user & bits(reglist, 15); 71 bool pc_temp = load && writeback && bits(reglist, 15); 72 73 if (!ones) { 74 numMicroops = 1; 75 } else if (load) { 76 numMicroops = ((ones + 1) / 2) 77 + ((ones % 2 == 0 && exception_ret) ? 1 : 0) 78 + (copy_base ? 1 : 0) 79 + (writeback? 1 : 0) 80 + (pc_temp ? 1 : 0); 81 } else { 82 numMicroops = ones + (writeback ? 1 : 0); 83 } 84 85 microOps = new StaticInstPtr[numMicroops]; 86 87 uint32_t addr = 0; 88 89 if (!up) 90 addr = (ones << 2) - 4; 91 92 if (!index) 93 addr += 4; 94 95 StaticInstPtr uop = microOps; 96 97 // Add 0 to Rn and stick it in ureg0. 98 // This is equivalent to a move. 99 if (copy_base) 100 uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0); 101* 102 unsigned reg = 0; 103 while (mem_ops != 0) { 104 // Do load operations in pairs if possible 105 if (load && mem_ops >= 2 && 106 !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) { 107 // 64-bit memory operation 108 // Find 2 set register bits (clear them after finding) 109 unsigned reg_idx1; 110 unsigned reg_idx2; 111 112 // Find the first register 113 while (!bits(regs, reg)) reg++; 114 replaceBits(regs, reg, 0); 115 reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg; 116 117 // Find the second register 118 while (!bits(regs, reg)) reg++; 119 replaceBits(regs, reg, 0); 120 reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg; 121 122 // Load into temp reg if necessary 123 if (reg_idx2 == INTREG_PC && pc_temp) 124 reg_idx2 = INTREG_UREG1; 125 126 // Actually load both registers from memory 127 uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2, 128* copy_base ? INTREG_UREG0 : rn, up, addr); 129 130 if (!writeback && reg_idx2 == INTREG_PC) { 131 // No writeback if idx==pc, set appropriate flags 132 (uop)->setFlag(StaticInst::IsControl); 133* (uop)->setFlag(StaticInst::IsIndirectControl); 134* 135 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 136 (uop)->setFlag(StaticInst::IsCondControl); 137* else 138 (uop)->setFlag(StaticInst::IsUncondControl); 139* } 140 141 if (up) addr += 8; 142 else addr -= 8; 143 mem_ops -= 2; 144 } else { 145 // 32-bit memory operation 146 // Find register for operation 147 unsigned reg_idx; 148 while (!bits(regs, reg)) reg++; 149 replaceBits(regs, reg, 0); 150 reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg; 151 152 if (load) { 153 if (writeback && reg_idx == INTREG_PC) { 154 // If this instruction changes the PC and performs a 155 // writeback, ensure the pc load/branch is the last uop. 156 // Load into a temp reg here. 157 uop = new MicroLdrUop(machInst, INTREG_UREG1, 158* copy_base ? INTREG_UREG0 : rn, up, addr); 159 } else if (reg_idx == INTREG_PC && exception_ret) { 160 // Special handling for exception return 161 uop = new MicroLdrRetUop(machInst, reg_idx, 162* copy_base ? INTREG_UREG0 : rn, up, addr); 163 } else { 164 // standard single load uop 165 uop = new MicroLdrUop(machInst, reg_idx, 166* copy_base ? INTREG_UREG0 : rn, up, addr); 167 } 168 169 // Loading pc as last operation? Set appropriate flags. 170 if (!writeback && reg_idx == INTREG_PC) { 171 (uop)->setFlag(StaticInst::IsControl); 172* (uop)->setFlag(StaticInst::IsIndirectControl); 173* 174 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 175 (uop)->setFlag(StaticInst::IsCondControl); 176* else 177 (uop)->setFlag(StaticInst::IsUncondControl); 178* } 179 } else { 180 uop = new MicroStrUop(machInst, reg_idx, rn, up, addr); 181* } 182 183 if (up) addr += 4; 184 else addr -= 4; 185 --mem_ops; 186 } 187 188 // Load/store micro-op generated, go to next uop 189 ++uop; 190 } 191 192 if (writeback && ones) { 193 // Perform writeback uop operation 194 if (up) 195 uop++ = new MicroAddiUop(machInst, rn, rn, ones 4); 196 else 197 uop++ = new MicroSubiUop(machInst, rn, rn, ones 4); 198 199 // Write PC after address writeback? 200 if (pc_temp) { 201 if (exception_ret) { 202 uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); 203* } else { 204 uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1); 205* } 206 (uop)->setFlag(StaticInst::IsControl); 207* (uop)->setFlag(StaticInst::IsIndirectControl); 208* 209 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 210 (uop)->setFlag(StaticInst::IsCondControl); 211* else 212 (uop)->setFlag(StaticInst::IsUncondControl); 213* 214 if (rn == INTREG_SP) 215 (uop)->setFlag(StaticInst::IsReturn); 216* 217 ++uop; 218 } 219 } 220 221 --uop; 222 (uop)->setLastMicroop(); 223* microOps[0]->setFirstMicroop(); 224 225 /* Take the control flags from the last microop for the macroop / 226* if ((uop)->isControl()) 227* setFlag(StaticInst::IsControl); 228 if ((uop)->isCondCtrl()) 229* setFlag(StaticInst::IsCondControl); 230 if ((uop)->isUncondCtrl()) 231* setFlag(StaticInst::IsUncondControl); 232 if ((uop)->isIndirectCtrl()) 233* setFlag(StaticInst::IsIndirectControl); 234 if ((uop)->isReturn()) 235* setFlag(StaticInst::IsReturn); 236 237 for (StaticInstPtr uop = microOps; !(uop)->isLastMicroop(); uop++) { 238 (uop)->setDelayedCommit(); 239* } 240} 241 242PairMemOp::PairMemOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 243* uint32_t size, bool fp, bool load, bool noAlloc, 244 bool signExt, bool exclusive, bool acrel, 245 int64_t imm, AddrMode mode, 246 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : 247 PredMacroOp(mnem, machInst, __opClass) 248{ 249 bool post = (mode == AddrMd_PostIndex); 250 bool writeback = (mode != AddrMd_Offset); 251 252 if (load) { 253 // Use integer rounding to round up loads of size 4 254 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0); 255 } else { 256 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0); 257 } 258 microOps = new StaticInstPtr[numMicroops]; 259 260 StaticInstPtr uop = microOps; 261* 262 rn = makeSP(rn); 263 264 if (!post) { 265 uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, 266* post ? 0 : imm); 267 } 268 269 if (fp) { 270 if (size == 16) { 271 if (load) { 272 uop++ = new MicroLdFp16Uop(machInst, rt, 273* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 274 uop++ = new MicroLdFp16Uop(machInst, rt2, 275* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 276 } else { 277 uop++ = new MicroStrQBFpXImmUop(machInst, rt, 278* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 279 uop++ = new MicroStrQTFpXImmUop(machInst, rt, 280* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 281 uop++ = new MicroStrQBFpXImmUop(machInst, rt2, 282* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 283 uop++ = new MicroStrQTFpXImmUop(machInst, rt2, 284* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 285 } 286 } else if (size == 8) { 287 if (load) { 288 uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2, 289* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 290 } else { 291 uop++ = new MicroStrFpXImmUop(machInst, rt, 292* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 293 uop++ = new MicroStrFpXImmUop(machInst, rt2, 294* post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel); 295 } 296 } else if (size == 4) { 297 if (load) { 298 uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2, 299* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 300 } else { 301 uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2, 302* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 303 } 304 } 305 } else { 306 if (size == 8) { 307 if (load) { 308 uop++ = new MicroLdPairUop(machInst, rt, rt2, 309* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 310 } else { 311 uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0, 312* 0, noAlloc, exclusive, acrel); 313 uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0, 314* size, noAlloc, exclusive, acrel); 315 } 316 } else if (size == 4) { 317 if (load) { 318 if (signExt) { 319 uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2, 320* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 321 } else { 322 uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2, 323* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 324 } 325 } else { 326 uop++ = new MicroStrDXImmUop(machInst, rt, rt2, 327* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 328 } 329 } 330 } 331 332 if (writeback) { 333 uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0, 334* post ? imm : 0); 335 } 336 337 assert(uop == &microOps[numMicroops]); 338 (--uop)->setLastMicroop(); 339* microOps[0]->setFirstMicroop(); 340 341 for (StaticInstPtr curUop = microOps; 342* !(curUop)->isLastMicroop(); curUop++) { 343* (curUop)->setDelayedCommit(); 344* } 345} 346 347BigFpMemImmOp::BigFpMemImmOp(const char mnem, ExtMachInst machInst, 348* OpClass __opClass, bool load, IntRegIndex dest, 349 IntRegIndex base, int64_t imm) : 350 PredMacroOp(mnem, machInst, __opClass) 351{ 352 numMicroops = load ? 1 : 2; 353 microOps = new StaticInstPtr[numMicroops]; 354 355 StaticInstPtr uop = microOps; 356* 357 if (load) { 358 uop = new MicroLdFp16Uop(machInst, dest, base, imm); 359* } else { 360 uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 361* (uop)->setDelayedCommit(); 362* ++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 363* } 364 (uop)->setLastMicroop(); 365* microOps[0]->setFirstMicroop(); 366} 367 368BigFpMemPostOp::BigFpMemPostOp(const char mnem, ExtMachInst machInst, 369* OpClass __opClass, bool load, IntRegIndex dest, 370 IntRegIndex base, int64_t imm) : 371 PredMacroOp(mnem, machInst, __opClass) 372{ 373 numMicroops = load ? 2 : 3; 374 microOps = new StaticInstPtr[numMicroops]; 375 376 StaticInstPtr uop = microOps; 377* 378 if (load) { 379 uop++ = new MicroLdFp16Uop(machInst, dest, base, 0); 380* } else { 381 uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0); 382* uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0); 383* } 384 uop = new MicroAddXiUop(machInst, base, base, imm); 385* (uop)->setLastMicroop(); 386* microOps[0]->setFirstMicroop(); 387 388 for (StaticInstPtr curUop = microOps; 389* !(curUop)->isLastMicroop(); curUop++) { 390* (curUop)->setDelayedCommit(); 391* } 392} 393 394BigFpMemPreOp::BigFpMemPreOp(const char mnem, ExtMachInst machInst, 395* OpClass __opClass, bool load, IntRegIndex dest, 396 IntRegIndex base, int64_t imm) : 397 PredMacroOp(mnem, machInst, __opClass) 398{ 399 numMicroops = load ? 2 : 3; 400 microOps = new StaticInstPtr[numMicroops]; 401 402 StaticInstPtr uop = microOps; 403* 404 if (load) { 405 uop++ = new MicroLdFp16Uop(machInst, dest, base, imm); 406* } else { 407 uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 408* uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 409* } 410 uop = new MicroAddXiUop(machInst, base, base, imm); 411* (uop)->setLastMicroop(); 412* microOps[0]->setFirstMicroop(); 413 414 for (StaticInstPtr curUop = microOps; 415* !(curUop)->isLastMicroop(); curUop++) { 416* (curUop)->setDelayedCommit(); 417* } 418} 419 420BigFpMemRegOp::BigFpMemRegOp(const char mnem, ExtMachInst machInst, 421* OpClass __opClass, bool load, IntRegIndex dest, 422 IntRegIndex base, IntRegIndex offset, 423 ArmExtendType type, int64_t imm) : 424 PredMacroOp(mnem, machInst, __opClass) 425{ 426 numMicroops = load ? 1 : 2; 427 microOps = new StaticInstPtr[numMicroops]; 428 429 StaticInstPtr uop = microOps; 430* 431 if (load) { 432 uop = new MicroLdFp16RegUop(machInst, dest, base, 433* offset, type, imm); 434 } else { 435 uop = new MicroStrQBFpXRegUop(machInst, dest, base, 436* offset, type, imm); 437 (uop)->setDelayedCommit(); 438* ++uop = new MicroStrQTFpXRegUop(machInst, dest, base, 439* offset, type, imm); 440 } 441 442 (uop)->setLastMicroop(); 443* microOps[0]->setFirstMicroop(); 444} 445 446BigFpMemLitOp::BigFpMemLitOp(const char mnem, ExtMachInst machInst, 447* OpClass __opClass, IntRegIndex dest, 448 int64_t imm) : 449 PredMacroOp(mnem, machInst, __opClass) 450{ 451 numMicroops = 1; 452 microOps = new StaticInstPtr[numMicroops]; 453 454 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm); 455 microOps[0]->setLastMicroop(); 456 microOps[0]->setFirstMicroop(); 457} 458 459VldMultOp::VldMultOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 460* unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 461 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 462 PredMacroOp(mnem, machInst, __opClass) 463{ 464 assert(regs > 0 && regs <= 4); 465 assert(regs % elems == 0); 466 467 numMicroops = (regs > 2) ? 2 : 1; 468 bool wb = (rm != 15); 469 bool deinterleave = (elems > 1); 470 471 if (wb) numMicroops++; 472 if (deinterleave) numMicroops += (regs / elems); 473 microOps = new StaticInstPtr[numMicroops]; 474 475 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 476 477 uint32_t noAlign = TLB::MustBeOne; 478 479 unsigned uopIdx = 0; 480 switch (regs) { 481 case 4: 482 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 483 size, machInst, rMid, rn, 0, align); 484 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 485 size, machInst, rMid + 4, rn, 16, noAlign); 486 break; 487 case 3: 488 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 489 size, machInst, rMid, rn, 0, align); 490 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 491 size, machInst, rMid + 4, rn, 16, noAlign); 492 break; 493 case 2: 494 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 495 size, machInst, rMid, rn, 0, align); 496 break; 497 case 1: 498 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 499 size, machInst, rMid, rn, 0, align); 500 break; 501 default: 502 // Unknown number of registers 503 microOps[uopIdx++] = new Unknown(machInst); 504 } 505 if (wb) { 506 if (rm != 15 && rm != 13) { 507 microOps[uopIdx++] = 508 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 509 } else { 510 microOps[uopIdx++] = 511 new MicroAddiUop(machInst, rn, rn, regs * 8); 512 } 513 } 514 if (deinterleave) { 515 switch (elems) { 516 case 4: 517 assert(regs == 4); 518 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 519 size, machInst, vd * 2, rMid, inc * 2); 520 break; 521 case 3: 522 assert(regs == 3); 523 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 524 size, machInst, vd * 2, rMid, inc * 2); 525 break; 526 case 2: 527 assert(regs == 4 \|\| regs == 2); 528 if (regs == 4) { 529 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 530 size, machInst, vd * 2, rMid, inc * 2); 531 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 532 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 533 } else { 534 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 535 size, machInst, vd * 2, rMid, inc * 2); 536 } 537 break; 538 default: 539 // Bad number of elements to deinterleave 540 microOps[uopIdx++] = new Unknown(machInst); 541 } 542 } 543 assert(uopIdx == numMicroops); 544 545 for (unsigned i = 0; i < numMicroops - 1; i++) { 546 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 547* assert(uopPtr); 548 uopPtr->setDelayedCommit(); 549 } 550 microOps[0]->setFirstMicroop(); 551 microOps[numMicroops - 1]->setLastMicroop(); 552} 553 554VldSingleOp::VldSingleOp(const char mnem, ExtMachInst machInst, 555* OpClass __opClass, bool all, unsigned elems, 556 RegIndex rn, RegIndex vd, unsigned regs, 557 unsigned inc, uint32_t size, uint32_t align, 558 RegIndex rm, unsigned lane) : 559 PredMacroOp(mnem, machInst, __opClass) 560{ 561 assert(regs > 0 && regs <= 4); 562 assert(regs % elems == 0); 563 564 unsigned eBytes = (1 << size); 565 unsigned loadSize = eBytes * elems; 566 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 567 sizeof(FloatRegBits); 568 569 assert(loadRegs > 0 && loadRegs <= 4); 570 571 numMicroops = 1; 572 bool wb = (rm != 15); 573 574 if (wb) numMicroops++; 575 numMicroops += (regs / elems); 576 microOps = new StaticInstPtr[numMicroops]; 577 578 RegIndex ufp0 = NumFloatV7ArchRegs; 579 580 unsigned uopIdx = 0; 581 switch (loadSize) { 582 case 1: 583 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 584 machInst, ufp0, rn, 0, align); 585 break; 586 case 2: 587 if (eBytes == 2) { 588 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 589 machInst, ufp0, rn, 0, align); 590 } else { 591 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 592 machInst, ufp0, rn, 0, align); 593 } 594 break; 595 case 3: 596 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 597 machInst, ufp0, rn, 0, align); 598 break; 599 case 4: 600 switch (eBytes) { 601 case 1: 602 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 603 machInst, ufp0, rn, 0, align); 604 break; 605 case 2: 606 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 607 machInst, ufp0, rn, 0, align); 608 break; 609 case 4: 610 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 611 machInst, ufp0, rn, 0, align); 612 break; 613 } 614 break; 615 case 6: 616 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 617 machInst, ufp0, rn, 0, align); 618 break; 619 case 8: 620 switch (eBytes) { 621 case 2: 622 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 623 machInst, ufp0, rn, 0, align); 624 break; 625 case 4: 626 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 627 machInst, ufp0, rn, 0, align); 628 break; 629 } 630 break; 631 case 12: 632 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 633 machInst, ufp0, rn, 0, align); 634 break; 635 case 16: 636 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 637 machInst, ufp0, rn, 0, align); 638 break; 639 default: 640 // Unrecognized load size 641 microOps[uopIdx++] = new Unknown(machInst); 642 } 643 if (wb) { 644 if (rm != 15 && rm != 13) { 645 microOps[uopIdx++] = 646 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 647 } else { 648 microOps[uopIdx++] = 649 new MicroAddiUop(machInst, rn, rn, loadSize); 650 } 651 } 652 switch (elems) { 653 case 4: 654 assert(regs == 4); 655 switch (size) { 656 case 0: 657 if (all) { 658 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 659 machInst, vd * 2, ufp0, inc * 2); 660 } else { 661 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 662 machInst, vd * 2, ufp0, inc * 2, lane); 663 } 664 break; 665 case 1: 666 if (all) { 667 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 668 machInst, vd * 2, ufp0, inc * 2); 669 } else { 670 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 671 machInst, vd * 2, ufp0, inc * 2, lane); 672 } 673 break; 674 case 2: 675 if (all) { 676 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 677 machInst, vd * 2, ufp0, inc * 2); 678 } else { 679 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 680 machInst, vd * 2, ufp0, inc * 2, lane); 681 } 682 break; 683 default: 684 // Bad size 685 microOps[uopIdx++] = new Unknown(machInst); 686 break; 687 } 688 break; 689 case 3: 690 assert(regs == 3); 691 switch (size) { 692 case 0: 693 if (all) { 694 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 695 machInst, vd * 2, ufp0, inc * 2); 696 } else { 697 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 698 machInst, vd * 2, ufp0, inc * 2, lane); 699 } 700 break; 701 case 1: 702 if (all) { 703 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 704 machInst, vd * 2, ufp0, inc * 2); 705 } else { 706 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 707 machInst, vd * 2, ufp0, inc * 2, lane); 708 } 709 break; 710 case 2: 711 if (all) { 712 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 713 machInst, vd * 2, ufp0, inc * 2); 714 } else { 715 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 716 machInst, vd * 2, ufp0, inc * 2, lane); 717 } 718 break; 719 default: 720 // Bad size 721 microOps[uopIdx++] = new Unknown(machInst); 722 break; 723 } 724 break; 725 case 2: 726 assert(regs == 2); 727 assert(loadRegs <= 2); 728 switch (size) { 729 case 0: 730 if (all) { 731 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 732 machInst, vd * 2, ufp0, inc * 2); 733 } else { 734 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 735 machInst, vd * 2, ufp0, inc * 2, lane); 736 } 737 break; 738 case 1: 739 if (all) { 740 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 741 machInst, vd * 2, ufp0, inc * 2); 742 } else { 743 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 744 machInst, vd * 2, ufp0, inc * 2, lane); 745 } 746 break; 747 case 2: 748 if (all) { 749 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 750 machInst, vd * 2, ufp0, inc * 2); 751 } else { 752 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 753 machInst, vd * 2, ufp0, inc * 2, lane); 754 } 755 break; 756 default: 757 // Bad size 758 microOps[uopIdx++] = new Unknown(machInst); 759 break; 760 } 761 break; 762 case 1: 763 assert(regs == 1 \|\| (all && regs == 2)); 764 assert(loadRegs <= 2); 765 for (unsigned offset = 0; offset < regs; offset++) { 766 switch (size) { 767 case 0: 768 if (all) { 769 microOps[uopIdx++] = 770 new MicroUnpackAllNeon2to2Uop<uint8_t>( 771 machInst, (vd + offset) * 2, ufp0, inc * 2); 772 } else { 773 microOps[uopIdx++] = 774 new MicroUnpackNeon2to2Uop<uint8_t>( 775 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 776 } 777 break; 778 case 1: 779 if (all) { 780 microOps[uopIdx++] = 781 new MicroUnpackAllNeon2to2Uop<uint16_t>( 782 machInst, (vd + offset) * 2, ufp0, inc * 2); 783 } else { 784 microOps[uopIdx++] = 785 new MicroUnpackNeon2to2Uop<uint16_t>( 786 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 787 } 788 break; 789 case 2: 790 if (all) { 791 microOps[uopIdx++] = 792 new MicroUnpackAllNeon2to2Uop<uint32_t>( 793 machInst, (vd + offset) * 2, ufp0, inc * 2); 794 } else { 795 microOps[uopIdx++] = 796 new MicroUnpackNeon2to2Uop<uint32_t>( 797 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 798 } 799 break; 800 default: 801 // Bad size 802 microOps[uopIdx++] = new Unknown(machInst); 803 break; 804 } 805 } 806 break; 807 default: 808 // Bad number of elements to unpack 809 microOps[uopIdx++] = new Unknown(machInst); 810 } 811 assert(uopIdx == numMicroops); 812 813 for (unsigned i = 0; i < numMicroops - 1; i++) { 814 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 815* assert(uopPtr); 816 uopPtr->setDelayedCommit(); 817 } 818 microOps[0]->setFirstMicroop(); 819 microOps[numMicroops - 1]->setLastMicroop(); 820} 821 822VstMultOp::VstMultOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 823* unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 824 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 825 PredMacroOp(mnem, machInst, __opClass) 826{ 827 assert(regs > 0 && regs <= 4); 828 assert(regs % elems == 0); 829 830 numMicroops = (regs > 2) ? 2 : 1; 831 bool wb = (rm != 15); 832 bool interleave = (elems > 1); 833 834 if (wb) numMicroops++; 835 if (interleave) numMicroops += (regs / elems); 836 microOps = new StaticInstPtr[numMicroops]; 837 838 uint32_t noAlign = TLB::MustBeOne; 839 840 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 841 842 unsigned uopIdx = 0; 843 if (interleave) { 844 switch (elems) { 845 case 4: 846 assert(regs == 4); 847 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 848 size, machInst, rMid, vd * 2, inc * 2); 849 break; 850 case 3: 851 assert(regs == 3); 852 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 853 size, machInst, rMid, vd * 2, inc * 2); 854 break; 855 case 2: 856 assert(regs == 4 \|\| regs == 2); 857 if (regs == 4) { 858 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 859 size, machInst, rMid, vd * 2, inc * 2); 860 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 861 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 862 } else { 863 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 864 size, machInst, rMid, vd * 2, inc * 2); 865 } 866 break; 867 default: 868 // Bad number of elements to interleave 869 microOps[uopIdx++] = new Unknown(machInst); 870 } 871 } 872 switch (regs) { 873 case 4: 874 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 875 size, machInst, rMid, rn, 0, align); 876 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 877 size, machInst, rMid + 4, rn, 16, noAlign); 878 break; 879 case 3: 880 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 881 size, machInst, rMid, rn, 0, align); 882 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 883 size, machInst, rMid + 4, rn, 16, noAlign); 884 break; 885 case 2: 886 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 887 size, machInst, rMid, rn, 0, align); 888 break; 889 case 1: 890 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 891 size, machInst, rMid, rn, 0, align); 892 break; 893 default: 894 // Unknown number of registers 895 microOps[uopIdx++] = new Unknown(machInst); 896 } 897 if (wb) { 898 if (rm != 15 && rm != 13) { 899 microOps[uopIdx++] = 900 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 901 } else { 902 microOps[uopIdx++] = 903 new MicroAddiUop(machInst, rn, rn, regs * 8); 904 } 905 } 906 assert(uopIdx == numMicroops); 907 908 for (unsigned i = 0; i < numMicroops - 1; i++) { 909 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 910* assert(uopPtr); 911 uopPtr->setDelayedCommit(); 912 } 913 microOps[0]->setFirstMicroop(); 914 microOps[numMicroops - 1]->setLastMicroop(); 915} 916 917VstSingleOp::VstSingleOp(const char mnem, ExtMachInst machInst, 918* OpClass __opClass, bool all, unsigned elems, 919 RegIndex rn, RegIndex vd, unsigned regs, 920 unsigned inc, uint32_t size, uint32_t align, 921 RegIndex rm, unsigned lane) : 922 PredMacroOp(mnem, machInst, __opClass) 923{ 924 assert(!all); 925 assert(regs > 0 && regs <= 4); 926 assert(regs % elems == 0); 927 928 unsigned eBytes = (1 << size); 929 unsigned storeSize = eBytes * elems; 930 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 931 sizeof(FloatRegBits); 932 933 assert(storeRegs > 0 && storeRegs <= 4); 934 935 numMicroops = 1; 936 bool wb = (rm != 15); 937 938 if (wb) numMicroops++; 939 numMicroops += (regs / elems); 940 microOps = new StaticInstPtr[numMicroops]; 941 942 RegIndex ufp0 = NumFloatV7ArchRegs; 943 944 unsigned uopIdx = 0; 945 switch (elems) { 946 case 4: 947 assert(regs == 4); 948 switch (size) { 949 case 0: 950 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 951 machInst, ufp0, vd * 2, inc * 2, lane); 952 break; 953 case 1: 954 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 955 machInst, ufp0, vd * 2, inc * 2, lane); 956 break; 957 case 2: 958 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 959 machInst, ufp0, vd * 2, inc * 2, lane); 960 break; 961 default: 962 // Bad size 963 microOps[uopIdx++] = new Unknown(machInst); 964 break; 965 } 966 break; 967 case 3: 968 assert(regs == 3); 969 switch (size) { 970 case 0: 971 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 972 machInst, ufp0, vd * 2, inc * 2, lane); 973 break; 974 case 1: 975 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 976 machInst, ufp0, vd * 2, inc * 2, lane); 977 break; 978 case 2: 979 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 980 machInst, ufp0, vd * 2, inc * 2, lane); 981 break; 982 default: 983 // Bad size 984 microOps[uopIdx++] = new Unknown(machInst); 985 break; 986 } 987 break; 988 case 2: 989 assert(regs == 2); 990 assert(storeRegs <= 2); 991 switch (size) { 992 case 0: 993 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 994 machInst, ufp0, vd * 2, inc * 2, lane); 995 break; 996 case 1: 997 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 998 machInst, ufp0, vd * 2, inc * 2, lane); 999 break; 1000 case 2: 1001 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 1002 machInst, ufp0, vd * 2, inc * 2, lane); 1003 break; 1004 default: 1005 // Bad size 1006 microOps[uopIdx++] = new Unknown(machInst); 1007 break; 1008 } 1009 break; 1010 case 1: 1011 assert(regs == 1 \|\| (all && regs == 2)); 1012 assert(storeRegs <= 2); 1013 for (unsigned offset = 0; offset < regs; offset++) { 1014 switch (size) { 1015 case 0: 1016 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 1017 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1018 break; 1019 case 1: 1020 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 1021 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1022 break; 1023 case 2: 1024 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 1025 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1026 break; 1027 default: 1028 // Bad size 1029 microOps[uopIdx++] = new Unknown(machInst); 1030 break; 1031 } 1032 } 1033 break; 1034 default: 1035 // Bad number of elements to unpack 1036 microOps[uopIdx++] = new Unknown(machInst); 1037 } 1038 switch (storeSize) { 1039 case 1: 1040 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 1041 machInst, ufp0, rn, 0, align); 1042 break; 1043 case 2: 1044 if (eBytes == 2) { 1045 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 1046 machInst, ufp0, rn, 0, align); 1047 } else { 1048 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 1049 machInst, ufp0, rn, 0, align); 1050 } 1051 break; 1052 case 3: 1053 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 1054 machInst, ufp0, rn, 0, align); 1055 break; 1056 case 4: 1057 switch (eBytes) { 1058 case 1: 1059 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 1060 machInst, ufp0, rn, 0, align); 1061 break; 1062 case 2: 1063 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1064 machInst, ufp0, rn, 0, align); 1065 break; 1066 case 4: 1067 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1068 machInst, ufp0, rn, 0, align); 1069 break; 1070 } 1071 break; 1072 case 6: 1073 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1074 machInst, ufp0, rn, 0, align); 1075 break; 1076 case 8: 1077 switch (eBytes) { 1078 case 2: 1079 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1080 machInst, ufp0, rn, 0, align); 1081 break; 1082 case 4: 1083 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1084 machInst, ufp0, rn, 0, align); 1085 break; 1086 } 1087 break; 1088 case 12: 1089 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1090 machInst, ufp0, rn, 0, align); 1091 break; 1092 case 16: 1093 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1094 machInst, ufp0, rn, 0, align); 1095 break; 1096 default: 1097 // Bad store size 1098 microOps[uopIdx++] = new Unknown(machInst); 1099 } 1100 if (wb) { 1101 if (rm != 15 && rm != 13) { 1102 microOps[uopIdx++] = 1103 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1104 } else { 1105 microOps[uopIdx++] = 1106 new MicroAddiUop(machInst, rn, rn, storeSize); 1107 } 1108 } 1109 assert(uopIdx == numMicroops); 1110 1111 for (unsigned i = 0; i < numMicroops - 1; i++) { 1112 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 1113* assert(uopPtr); 1114 uopPtr->setDelayedCommit(); 1115 } 1116 microOps[0]->setFirstMicroop(); 1117 microOps[numMicroops - 1]->setLastMicroop(); 1118} 1119 1120VldMultOp64::VldMultOp64(const char mnem, ExtMachInst machInst, 1121* OpClass __opClass, RegIndex rn, RegIndex vd, 1122 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1123 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1124 PredMacroOp(mnem, machInst, __opClass) 1125{ 1126 RegIndex vx = NumFloatV8ArchRegs / 4; 1127 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1128 bool baseIsSP = isSP((IntRegIndex) rnsp); 1129 1130 numMicroops = wb ? 1 : 0; 1131 1132 int totNumBytes = numRegs * dataSize / 8; 1133 assert(totNumBytes <= 64); 1134 1135 // The guiding principle here is that no more than 16 bytes can be 1136 // transferred at a time 1137 int numMemMicroops = totNumBytes / 16; 1138 int residuum = totNumBytes % 16; 1139 if (residuum) 1140 ++numMemMicroops; 1141 numMicroops += numMemMicroops; 1142 1143 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1144 numMicroops += numMarshalMicroops; 1145 1146 microOps = new StaticInstPtr[numMicroops]; 1147 unsigned uopIdx = 0; 1148 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1149 TLB::AllowUnaligned; 1150 1151 int i = 0; 1152 for (; i < numMemMicroops - 1; ++i) { 1153 microOps[uopIdx++] = new MicroNeonLoad64( 1154 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1155 baseIsSP, 16 /* accSize /, eSize); 1156* } 1157 microOps[uopIdx++] = new MicroNeonLoad64( 1158 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1159 residuum ? residuum : 16 /* accSize /, eSize); 1160* 1161 // Writeback microop: the post-increment amount is encoded in "Rm": a 1162 // 64-bit general register OR as '11111' for an immediate value equal to 1163 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1164 if (wb) { 1165 if (rm != ((RegIndex) INTREG_X31)) { 1166 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1167 UXTX, 0); 1168 } else { 1169 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1170 totNumBytes); 1171 } 1172 } 1173 1174 for (int i = 0; i < numMarshalMicroops; ++i) { 1175 switch(numRegs) { 1176 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( 1177 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1178 numStructElems, 1, i /* step /); 1179* break; 1180 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( 1181 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1182 numStructElems, 2, i /* step /); 1183* break; 1184 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( 1185 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1186 numStructElems, 3, i /* step /); 1187* break; 1188 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( 1189 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1190 numStructElems, 4, i /* step /); 1191* break; 1192 default: panic("Invalid number of registers"); 1193 } 1194 1195 } 1196 1197 assert(uopIdx == numMicroops); 1198 1199 for (int i = 0; i < numMicroops - 1; ++i) { 1200 microOps[i]->setDelayedCommit(); 1201 } 1202 microOps[numMicroops - 1]->setLastMicroop(); 1203} 1204 1205VstMultOp64::VstMultOp64(const char mnem, ExtMachInst machInst, 1206* OpClass __opClass, RegIndex rn, RegIndex vd, 1207 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1208 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1209 PredMacroOp(mnem, machInst, __opClass) 1210{ 1211 RegIndex vx = NumFloatV8ArchRegs / 4; 1212 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1213 bool baseIsSP = isSP((IntRegIndex) rnsp); 1214 1215 numMicroops = wb ? 1 : 0; 1216 1217 int totNumBytes = numRegs * dataSize / 8; 1218 assert(totNumBytes <= 64); 1219 1220 // The guiding principle here is that no more than 16 bytes can be 1221 // transferred at a time 1222 int numMemMicroops = totNumBytes / 16; 1223 int residuum = totNumBytes % 16; 1224 if (residuum) 1225 ++numMemMicroops; 1226 numMicroops += numMemMicroops; 1227 1228 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1229 numMicroops += numMarshalMicroops; 1230 1231 microOps = new StaticInstPtr[numMicroops]; 1232 unsigned uopIdx = 0; 1233 1234 for (int i = 0; i < numMarshalMicroops; ++i) { 1235 switch (numRegs) { 1236 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( 1237 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1238 numStructElems, 1, i /* step /); 1239* break; 1240 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( 1241 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1242 numStructElems, 2, i /* step /); 1243* break; 1244 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( 1245 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1246 numStructElems, 3, i /* step /); 1247* break; 1248 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( 1249 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1250 numStructElems, 4, i /* step /); 1251* break; 1252 default: panic("Invalid number of registers"); 1253 } 1254 } 1255 1256 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1257 TLB::AllowUnaligned; 1258 1259 int i = 0; 1260 for (; i < numMemMicroops - 1; ++i) { 1261 microOps[uopIdx++] = new MicroNeonStore64( 1262 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1263 baseIsSP, 16 /* accSize /, eSize); 1264* } 1265 microOps[uopIdx++] = new MicroNeonStore64( 1266 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1267 residuum ? residuum : 16 /* accSize /, eSize); 1268* 1269 // Writeback microop: the post-increment amount is encoded in "Rm": a 1270 // 64-bit general register OR as '11111' for an immediate value equal to 1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1272 if (wb) { 1273 if (rm != ((RegIndex) INTREG_X31)) { 1274 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1275 UXTX, 0); 1276 } else { 1277 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1278 totNumBytes); 1279 } 1280 } 1281 1282 assert(uopIdx == numMicroops); 1283 1284 for (int i = 0; i < numMicroops - 1; i++) { 1285 microOps[i]->setDelayedCommit(); 1286 } 1287 microOps[numMicroops - 1]->setLastMicroop(); 1288} 1289 1290VldSingleOp64::VldSingleOp64(const char mnem, ExtMachInst machInst, 1291* OpClass __opClass, RegIndex rn, RegIndex vd, 1292 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1293 uint8_t numStructElems, uint8_t index, bool wb, 1294 bool replicate) : 1295 PredMacroOp(mnem, machInst, __opClass), 1296 eSize(0), dataSize(0), numStructElems(0), index(0), 1297 wb(false), replicate(false) 1298 1299{ 1300 RegIndex vx = NumFloatV8ArchRegs / 4; 1301 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1302 bool baseIsSP = isSP((IntRegIndex) rnsp); 1303 1304 numMicroops = wb ? 1 : 0; 1305 1306 int eSizeBytes = 1 << eSize; 1307 int totNumBytes = numStructElems * eSizeBytes; 1308 assert(totNumBytes <= 64); 1309 1310 // The guiding principle here is that no more than 16 bytes can be 1311 // transferred at a time 1312 int numMemMicroops = totNumBytes / 16; 1313 int residuum = totNumBytes % 16; 1314 if (residuum) 1315 ++numMemMicroops; 1316 numMicroops += numMemMicroops; 1317 1318 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1319 numMicroops += numMarshalMicroops; 1320 1321 microOps = new StaticInstPtr[numMicroops]; 1322 unsigned uopIdx = 0; 1323 1324 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1325 TLB::AllowUnaligned; 1326 1327 int i = 0; 1328 for (; i < numMemMicroops - 1; ++i) { 1329 microOps[uopIdx++] = new MicroNeonLoad64( 1330 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1331 baseIsSP, 16 /* accSize /, eSize); 1332* } 1333 microOps[uopIdx++] = new MicroNeonLoad64( 1334 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1335 residuum ? residuum : 16 /* accSize /, eSize); 1336* 1337 // Writeback microop: the post-increment amount is encoded in "Rm": a 1338 // 64-bit general register OR as '11111' for an immediate value equal to 1339 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1340 if (wb) { 1341 if (rm != ((RegIndex) INTREG_X31)) { 1342 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1343 UXTX, 0); 1344 } else { 1345 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1346 totNumBytes); 1347 } 1348 } 1349 1350 for (int i = 0; i < numMarshalMicroops; ++i) { 1351 microOps[uopIdx++] = new MicroUnpackNeon64( 1352 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1353 numStructElems, index, i /* step /, replicate); 1354* } 1355 1356 assert(uopIdx == numMicroops); 1357 1358 for (int i = 0; i < numMicroops - 1; i++) { 1359 microOps[i]->setDelayedCommit(); 1360 } 1361 microOps[numMicroops - 1]->setLastMicroop(); 1362} 1363 1364VstSingleOp64::VstSingleOp64(const char mnem, ExtMachInst machInst, 1365* OpClass __opClass, RegIndex rn, RegIndex vd, 1366 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1367 uint8_t numStructElems, uint8_t index, bool wb, 1368 bool replicate) : 1369 PredMacroOp(mnem, machInst, __opClass), 1370 eSize(0), dataSize(0), numStructElems(0), index(0), 1371 wb(false), replicate(false) 1372{ 1373 RegIndex vx = NumFloatV8ArchRegs / 4; 1374 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1375 bool baseIsSP = isSP((IntRegIndex) rnsp); 1376 1377 numMicroops = wb ? 1 : 0; 1378 1379 int eSizeBytes = 1 << eSize; 1380 int totNumBytes = numStructElems * eSizeBytes; 1381 assert(totNumBytes <= 64); 1382 1383 // The guiding principle here is that no more than 16 bytes can be 1384 // transferred at a time 1385 int numMemMicroops = totNumBytes / 16; 1386 int residuum = totNumBytes % 16; 1387 if (residuum) 1388 ++numMemMicroops; 1389 numMicroops += numMemMicroops; 1390 1391 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1392 numMicroops += numMarshalMicroops; 1393 1394 microOps = new StaticInstPtr[numMicroops]; 1395 unsigned uopIdx = 0; 1396 1397 for (int i = 0; i < numMarshalMicroops; ++i) { 1398 microOps[uopIdx++] = new MicroPackNeon64( 1399 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1400 numStructElems, index, i /* step /, replicate); 1401* } 1402 1403 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1404 TLB::AllowUnaligned; 1405 1406 int i = 0; 1407 for (; i < numMemMicroops - 1; ++i) { 1408 microOps[uopIdx++] = new MicroNeonStore64( 1409 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1410 baseIsSP, 16 /* accsize /, eSize); 1411* } 1412 microOps[uopIdx++] = new MicroNeonStore64( 1413 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1414 residuum ? residuum : 16 /* accSize /, eSize); 1415* 1416 // Writeback microop: the post-increment amount is encoded in "Rm": a 1417 // 64-bit general register OR as '11111' for an immediate value equal to 1418 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1419 if (wb) { 1420 if (rm != ((RegIndex) INTREG_X31)) { 1421 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1422 UXTX, 0); 1423 } else { 1424 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1425 totNumBytes); 1426 } 1427 } 1428 1429 assert(uopIdx == numMicroops); 1430 1431 for (int i = 0; i < numMicroops - 1; i++) { 1432 microOps[i]->setDelayedCommit(); 1433 } 1434 microOps[numMicroops - 1]->setLastMicroop(); 1435} 1436 1437MacroVFPMemOp::MacroVFPMemOp(const char mnem, ExtMachInst machInst, 1438* OpClass __opClass, IntRegIndex rn, 1439 RegIndex vd, bool single, bool up, 1440 bool writeback, bool load, uint32_t offset) : 1441 PredMacroOp(mnem, machInst, __opClass) 1442{ 1443 int i = 0; 1444 1445 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1446 // to be functionally identical except that fldmx is deprecated. For now 1447 // we'll assume they're otherwise interchangable. 1448 int count = (single ? offset : (offset / 2)); 1449 if (count == 0 \|\| count > NumFloatV7ArchRegs) 1450 warn_once("Bad offset field for VFP load/store multiple.\n"); 1451 if (count == 0) { 1452 // Force there to be at least one microop so the macroop makes sense. 1453 writeback = true; 1454 } 1455 if (count > NumFloatV7ArchRegs) 1456 count = NumFloatV7ArchRegs; 1457 1458 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1459 microOps = new StaticInstPtr[numMicroops]; 1460 1461 int64_t addr = 0; 1462 1463 if (!up) 1464 addr = 4 * offset; 1465 1466 bool tempUp = up; 1467 for (int j = 0; j < count; j++) { 1468 if (load) { 1469 if (single) { 1470 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1471 tempUp, addr); 1472 } else { 1473 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1474 tempUp, addr); 1475 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1476 addr + (up ? 4 : -4)); 1477 } 1478 } else { 1479 if (single) { 1480 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1481 tempUp, addr); 1482 } else { 1483 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1484 tempUp, addr); 1485 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1486 addr + (up ? 4 : -4)); 1487 } 1488 } 1489 if (!tempUp) { 1490 addr -= (single ? 4 : 8); 1491 // The microops don't handle negative displacement, so turn if we 1492 // hit zero, flip polarity and start adding. 1493 if (addr <= 0) { 1494 tempUp = true; 1495 addr = -addr; 1496 } 1497 } else { 1498 addr += (single ? 4 : 8); 1499 } 1500 } 1501 1502 if (writeback) { 1503 if (up) { 1504 microOps[i++] = 1505 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1506 } else { 1507 microOps[i++] = 1508 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1509 } 1510 } 1511 1512 assert(numMicroops == i); 1513 microOps[numMicroops - 1]->setLastMicroop(); 1514 1515 for (StaticInstPtr curUop = microOps; 1516* !(curUop)->isLastMicroop(); curUop++) { 1517* MicroOp * uopPtr = dynamic_cast<MicroOp >(curUop->get()); 1518* assert(uopPtr); 1519 uopPtr->setDelayedCommit(); 1520 } 1521} 1522 1523std::string 1524MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1525{ 1526* std::stringstream ss; 1527 printMnemonic(ss); 1528 printReg(ss, ura); 1529 ss << ", "; 1530 printReg(ss, urb); 1531 ss << ", "; 1532 ccprintf(ss, "#%d", imm); 1533 return ss.str(); 1534} 1535 1536std::string 1537MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1538{ 1539* std::stringstream ss; 1540 printMnemonic(ss); 1541 printReg(ss, ura); 1542 ss << ", "; 1543 printReg(ss, urb); 1544 ss << ", "; 1545 ccprintf(ss, "#%d", imm); 1546 return ss.str(); 1547} 1548 1549std::string 1550MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable symtab) const 1551{ 1552* std::stringstream ss; 1553 printMnemonic(ss); 1554 ss << "[PC,CPSR]"; 1555 return ss.str(); 1556} 1557 1558std::string 1559MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1560{ 1561* std::stringstream ss; 1562 printMnemonic(ss); 1563 printReg(ss, ura); 1564 ccprintf(ss, ", "); 1565 printReg(ss, urb); 1566 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1567 return ss.str(); 1568} 1569 1570std::string 1571MicroIntMov::generateDisassembly(Addr pc, const SymbolTable symtab) const 1572{ 1573* std::stringstream ss; 1574 printMnemonic(ss); 1575 printReg(ss, ura); 1576 ss << ", "; 1577 printReg(ss, urb); 1578 return ss.str(); 1579} 1580 1581std::string 1582MicroIntOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1583{ 1584* std::stringstream ss; 1585 printMnemonic(ss); 1586 printReg(ss, ura); 1587 ss << ", "; 1588 printReg(ss, urb); 1589 ss << ", "; 1590 printReg(ss, urc); 1591 return ss.str(); 1592} 1593 1594std::string 1595MicroMemOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1596{ 1597* std::stringstream ss; 1598 printMnemonic(ss); 1599 if (isFloating()) 1600 printReg(ss, ura + FP_Reg_Base); 1601 else 1602 printReg(ss, ura); 1603 ss << ", ["; 1604 printReg(ss, urb); 1605 ss << ", "; 1606 ccprintf(ss, "#%d", imm); 1607 ss << "]"; 1608 return ss.str(); 1609} 1610 1611std::string 1612MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1613{ 1614* std::stringstream ss; 1615 printMnemonic(ss); 1616 printReg(ss, dest); 1617 ss << ","; 1618 printReg(ss, dest2); 1619 ss << ", ["; 1620 printReg(ss, urb); 1621 ss << ", "; 1622 ccprintf(ss, "#%d", imm); 1623 ss << "]"; 1624 return ss.str(); 1625} 1626 1627}	47#include "arch/arm/generated/decoder.hh" 48#include "arch/arm/insts/neon64_mem.hh" 49 50using namespace std; 51using namespace ArmISAInst; 52 53namespace ArmISA 54{ 55 56MacroMemOp::MacroMemOp(const char mnem, ExtMachInst machInst, 57 OpClass __opClass, IntRegIndex rn, 58 bool index, bool up, bool user, bool writeback, 59 bool load, uint32_t reglist) : 60 PredMacroOp(mnem, machInst, __opClass) 61{ 62 uint32_t regs = reglist; 63 uint32_t ones = number_of_ones(reglist); 64 uint32_t mem_ops = ones; 65 66 // Copy the base address register if we overwrite it, or if this instruction 67 // is basically a no-op (we have to do something) 68 bool copy_base = (bits(reglist, rn) && load) \|\| !ones; 69 bool force_user = user & !bits(reglist, 15); 70 bool exception_ret = user & bits(reglist, 15); 71 bool pc_temp = load && writeback && bits(reglist, 15); 72 73 if (!ones) { 74 numMicroops = 1; 75 } else if (load) { 76 numMicroops = ((ones + 1) / 2) 77 + ((ones % 2 == 0 && exception_ret) ? 1 : 0) 78 + (copy_base ? 1 : 0) 79 + (writeback? 1 : 0) 80 + (pc_temp ? 1 : 0); 81 } else { 82 numMicroops = ones + (writeback ? 1 : 0); 83 } 84 85 microOps = new StaticInstPtr[numMicroops]; 86 87 uint32_t addr = 0; 88 89 if (!up) 90 addr = (ones << 2) - 4; 91 92 if (!index) 93 addr += 4; 94 95 StaticInstPtr uop = microOps; 96 97 // Add 0 to Rn and stick it in ureg0. 98 // This is equivalent to a move. 99 if (copy_base) 100 uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0); 101* 102 unsigned reg = 0; 103 while (mem_ops != 0) { 104 // Do load operations in pairs if possible 105 if (load && mem_ops >= 2 && 106 !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) { 107 // 64-bit memory operation 108 // Find 2 set register bits (clear them after finding) 109 unsigned reg_idx1; 110 unsigned reg_idx2; 111 112 // Find the first register 113 while (!bits(regs, reg)) reg++; 114 replaceBits(regs, reg, 0); 115 reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg; 116 117 // Find the second register 118 while (!bits(regs, reg)) reg++; 119 replaceBits(regs, reg, 0); 120 reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg; 121 122 // Load into temp reg if necessary 123 if (reg_idx2 == INTREG_PC && pc_temp) 124 reg_idx2 = INTREG_UREG1; 125 126 // Actually load both registers from memory 127 uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2, 128* copy_base ? INTREG_UREG0 : rn, up, addr); 129 130 if (!writeback && reg_idx2 == INTREG_PC) { 131 // No writeback if idx==pc, set appropriate flags 132 (uop)->setFlag(StaticInst::IsControl); 133* (uop)->setFlag(StaticInst::IsIndirectControl); 134* 135 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 136 (uop)->setFlag(StaticInst::IsCondControl); 137* else 138 (uop)->setFlag(StaticInst::IsUncondControl); 139* } 140 141 if (up) addr += 8; 142 else addr -= 8; 143 mem_ops -= 2; 144 } else { 145 // 32-bit memory operation 146 // Find register for operation 147 unsigned reg_idx; 148 while (!bits(regs, reg)) reg++; 149 replaceBits(regs, reg, 0); 150 reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg; 151 152 if (load) { 153 if (writeback && reg_idx == INTREG_PC) { 154 // If this instruction changes the PC and performs a 155 // writeback, ensure the pc load/branch is the last uop. 156 // Load into a temp reg here. 157 uop = new MicroLdrUop(machInst, INTREG_UREG1, 158* copy_base ? INTREG_UREG0 : rn, up, addr); 159 } else if (reg_idx == INTREG_PC && exception_ret) { 160 // Special handling for exception return 161 uop = new MicroLdrRetUop(machInst, reg_idx, 162* copy_base ? INTREG_UREG0 : rn, up, addr); 163 } else { 164 // standard single load uop 165 uop = new MicroLdrUop(machInst, reg_idx, 166* copy_base ? INTREG_UREG0 : rn, up, addr); 167 } 168 169 // Loading pc as last operation? Set appropriate flags. 170 if (!writeback && reg_idx == INTREG_PC) { 171 (uop)->setFlag(StaticInst::IsControl); 172* (uop)->setFlag(StaticInst::IsIndirectControl); 173* 174 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 175 (uop)->setFlag(StaticInst::IsCondControl); 176* else 177 (uop)->setFlag(StaticInst::IsUncondControl); 178* } 179 } else { 180 uop = new MicroStrUop(machInst, reg_idx, rn, up, addr); 181* } 182 183 if (up) addr += 4; 184 else addr -= 4; 185 --mem_ops; 186 } 187 188 // Load/store micro-op generated, go to next uop 189 ++uop; 190 } 191 192 if (writeback && ones) { 193 // Perform writeback uop operation 194 if (up) 195 uop++ = new MicroAddiUop(machInst, rn, rn, ones 4); 196 else 197 uop++ = new MicroSubiUop(machInst, rn, rn, ones 4); 198 199 // Write PC after address writeback? 200 if (pc_temp) { 201 if (exception_ret) { 202 uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); 203* } else { 204 uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1); 205* } 206 (uop)->setFlag(StaticInst::IsControl); 207* (uop)->setFlag(StaticInst::IsIndirectControl); 208* 209 if (!(condCode == COND_AL \|\| condCode == COND_UC)) 210 (uop)->setFlag(StaticInst::IsCondControl); 211* else 212 (uop)->setFlag(StaticInst::IsUncondControl); 213* 214 if (rn == INTREG_SP) 215 (uop)->setFlag(StaticInst::IsReturn); 216* 217 ++uop; 218 } 219 } 220 221 --uop; 222 (uop)->setLastMicroop(); 223* microOps[0]->setFirstMicroop(); 224 225 /* Take the control flags from the last microop for the macroop / 226* if ((uop)->isControl()) 227* setFlag(StaticInst::IsControl); 228 if ((uop)->isCondCtrl()) 229* setFlag(StaticInst::IsCondControl); 230 if ((uop)->isUncondCtrl()) 231* setFlag(StaticInst::IsUncondControl); 232 if ((uop)->isIndirectCtrl()) 233* setFlag(StaticInst::IsIndirectControl); 234 if ((uop)->isReturn()) 235* setFlag(StaticInst::IsReturn); 236 237 for (StaticInstPtr uop = microOps; !(uop)->isLastMicroop(); uop++) { 238 (uop)->setDelayedCommit(); 239* } 240} 241 242PairMemOp::PairMemOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 243* uint32_t size, bool fp, bool load, bool noAlloc, 244 bool signExt, bool exclusive, bool acrel, 245 int64_t imm, AddrMode mode, 246 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : 247 PredMacroOp(mnem, machInst, __opClass) 248{ 249 bool post = (mode == AddrMd_PostIndex); 250 bool writeback = (mode != AddrMd_Offset); 251 252 if (load) { 253 // Use integer rounding to round up loads of size 4 254 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0); 255 } else { 256 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0); 257 } 258 microOps = new StaticInstPtr[numMicroops]; 259 260 StaticInstPtr uop = microOps; 261* 262 rn = makeSP(rn); 263 264 if (!post) { 265 uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, 266* post ? 0 : imm); 267 } 268 269 if (fp) { 270 if (size == 16) { 271 if (load) { 272 uop++ = new MicroLdFp16Uop(machInst, rt, 273* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 274 uop++ = new MicroLdFp16Uop(machInst, rt2, 275* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 276 } else { 277 uop++ = new MicroStrQBFpXImmUop(machInst, rt, 278* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 279 uop++ = new MicroStrQTFpXImmUop(machInst, rt, 280* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 281 uop++ = new MicroStrQBFpXImmUop(machInst, rt2, 282* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 283 uop++ = new MicroStrQTFpXImmUop(machInst, rt2, 284* post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 285 } 286 } else if (size == 8) { 287 if (load) { 288 uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2, 289* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 290 } else { 291 uop++ = new MicroStrFpXImmUop(machInst, rt, 292* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 293 uop++ = new MicroStrFpXImmUop(machInst, rt2, 294* post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel); 295 } 296 } else if (size == 4) { 297 if (load) { 298 uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2, 299* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 300 } else { 301 uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2, 302* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 303 } 304 } 305 } else { 306 if (size == 8) { 307 if (load) { 308 uop++ = new MicroLdPairUop(machInst, rt, rt2, 309* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 310 } else { 311 uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0, 312* 0, noAlloc, exclusive, acrel); 313 uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0, 314* size, noAlloc, exclusive, acrel); 315 } 316 } else if (size == 4) { 317 if (load) { 318 if (signExt) { 319 uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2, 320* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 321 } else { 322 uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2, 323* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 324 } 325 } else { 326 uop++ = new MicroStrDXImmUop(machInst, rt, rt2, 327* post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 328 } 329 } 330 } 331 332 if (writeback) { 333 uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0, 334* post ? imm : 0); 335 } 336 337 assert(uop == &microOps[numMicroops]); 338 (--uop)->setLastMicroop(); 339* microOps[0]->setFirstMicroop(); 340 341 for (StaticInstPtr curUop = microOps; 342* !(curUop)->isLastMicroop(); curUop++) { 343* (curUop)->setDelayedCommit(); 344* } 345} 346 347BigFpMemImmOp::BigFpMemImmOp(const char mnem, ExtMachInst machInst, 348* OpClass __opClass, bool load, IntRegIndex dest, 349 IntRegIndex base, int64_t imm) : 350 PredMacroOp(mnem, machInst, __opClass) 351{ 352 numMicroops = load ? 1 : 2; 353 microOps = new StaticInstPtr[numMicroops]; 354 355 StaticInstPtr uop = microOps; 356* 357 if (load) { 358 uop = new MicroLdFp16Uop(machInst, dest, base, imm); 359* } else { 360 uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 361* (uop)->setDelayedCommit(); 362* ++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 363* } 364 (uop)->setLastMicroop(); 365* microOps[0]->setFirstMicroop(); 366} 367 368BigFpMemPostOp::BigFpMemPostOp(const char mnem, ExtMachInst machInst, 369* OpClass __opClass, bool load, IntRegIndex dest, 370 IntRegIndex base, int64_t imm) : 371 PredMacroOp(mnem, machInst, __opClass) 372{ 373 numMicroops = load ? 2 : 3; 374 microOps = new StaticInstPtr[numMicroops]; 375 376 StaticInstPtr uop = microOps; 377* 378 if (load) { 379 uop++ = new MicroLdFp16Uop(machInst, dest, base, 0); 380* } else { 381 uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0); 382* uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0); 383* } 384 uop = new MicroAddXiUop(machInst, base, base, imm); 385* (uop)->setLastMicroop(); 386* microOps[0]->setFirstMicroop(); 387 388 for (StaticInstPtr curUop = microOps; 389* !(curUop)->isLastMicroop(); curUop++) { 390* (curUop)->setDelayedCommit(); 391* } 392} 393 394BigFpMemPreOp::BigFpMemPreOp(const char mnem, ExtMachInst machInst, 395* OpClass __opClass, bool load, IntRegIndex dest, 396 IntRegIndex base, int64_t imm) : 397 PredMacroOp(mnem, machInst, __opClass) 398{ 399 numMicroops = load ? 2 : 3; 400 microOps = new StaticInstPtr[numMicroops]; 401 402 StaticInstPtr uop = microOps; 403* 404 if (load) { 405 uop++ = new MicroLdFp16Uop(machInst, dest, base, imm); 406* } else { 407 uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 408* uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 409* } 410 uop = new MicroAddXiUop(machInst, base, base, imm); 411* (uop)->setLastMicroop(); 412* microOps[0]->setFirstMicroop(); 413 414 for (StaticInstPtr curUop = microOps; 415* !(curUop)->isLastMicroop(); curUop++) { 416* (curUop)->setDelayedCommit(); 417* } 418} 419 420BigFpMemRegOp::BigFpMemRegOp(const char mnem, ExtMachInst machInst, 421* OpClass __opClass, bool load, IntRegIndex dest, 422 IntRegIndex base, IntRegIndex offset, 423 ArmExtendType type, int64_t imm) : 424 PredMacroOp(mnem, machInst, __opClass) 425{ 426 numMicroops = load ? 1 : 2; 427 microOps = new StaticInstPtr[numMicroops]; 428 429 StaticInstPtr uop = microOps; 430* 431 if (load) { 432 uop = new MicroLdFp16RegUop(machInst, dest, base, 433* offset, type, imm); 434 } else { 435 uop = new MicroStrQBFpXRegUop(machInst, dest, base, 436* offset, type, imm); 437 (uop)->setDelayedCommit(); 438* ++uop = new MicroStrQTFpXRegUop(machInst, dest, base, 439* offset, type, imm); 440 } 441 442 (uop)->setLastMicroop(); 443* microOps[0]->setFirstMicroop(); 444} 445 446BigFpMemLitOp::BigFpMemLitOp(const char mnem, ExtMachInst machInst, 447* OpClass __opClass, IntRegIndex dest, 448 int64_t imm) : 449 PredMacroOp(mnem, machInst, __opClass) 450{ 451 numMicroops = 1; 452 microOps = new StaticInstPtr[numMicroops]; 453 454 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm); 455 microOps[0]->setLastMicroop(); 456 microOps[0]->setFirstMicroop(); 457} 458 459VldMultOp::VldMultOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 460* unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 461 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 462 PredMacroOp(mnem, machInst, __opClass) 463{ 464 assert(regs > 0 && regs <= 4); 465 assert(regs % elems == 0); 466 467 numMicroops = (regs > 2) ? 2 : 1; 468 bool wb = (rm != 15); 469 bool deinterleave = (elems > 1); 470 471 if (wb) numMicroops++; 472 if (deinterleave) numMicroops += (regs / elems); 473 microOps = new StaticInstPtr[numMicroops]; 474 475 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 476 477 uint32_t noAlign = TLB::MustBeOne; 478 479 unsigned uopIdx = 0; 480 switch (regs) { 481 case 4: 482 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 483 size, machInst, rMid, rn, 0, align); 484 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 485 size, machInst, rMid + 4, rn, 16, noAlign); 486 break; 487 case 3: 488 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 489 size, machInst, rMid, rn, 0, align); 490 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 491 size, machInst, rMid + 4, rn, 16, noAlign); 492 break; 493 case 2: 494 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 495 size, machInst, rMid, rn, 0, align); 496 break; 497 case 1: 498 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 499 size, machInst, rMid, rn, 0, align); 500 break; 501 default: 502 // Unknown number of registers 503 microOps[uopIdx++] = new Unknown(machInst); 504 } 505 if (wb) { 506 if (rm != 15 && rm != 13) { 507 microOps[uopIdx++] = 508 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 509 } else { 510 microOps[uopIdx++] = 511 new MicroAddiUop(machInst, rn, rn, regs * 8); 512 } 513 } 514 if (deinterleave) { 515 switch (elems) { 516 case 4: 517 assert(regs == 4); 518 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 519 size, machInst, vd * 2, rMid, inc * 2); 520 break; 521 case 3: 522 assert(regs == 3); 523 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 524 size, machInst, vd * 2, rMid, inc * 2); 525 break; 526 case 2: 527 assert(regs == 4 \|\| regs == 2); 528 if (regs == 4) { 529 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 530 size, machInst, vd * 2, rMid, inc * 2); 531 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 532 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 533 } else { 534 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 535 size, machInst, vd * 2, rMid, inc * 2); 536 } 537 break; 538 default: 539 // Bad number of elements to deinterleave 540 microOps[uopIdx++] = new Unknown(machInst); 541 } 542 } 543 assert(uopIdx == numMicroops); 544 545 for (unsigned i = 0; i < numMicroops - 1; i++) { 546 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 547* assert(uopPtr); 548 uopPtr->setDelayedCommit(); 549 } 550 microOps[0]->setFirstMicroop(); 551 microOps[numMicroops - 1]->setLastMicroop(); 552} 553 554VldSingleOp::VldSingleOp(const char mnem, ExtMachInst machInst, 555* OpClass __opClass, bool all, unsigned elems, 556 RegIndex rn, RegIndex vd, unsigned regs, 557 unsigned inc, uint32_t size, uint32_t align, 558 RegIndex rm, unsigned lane) : 559 PredMacroOp(mnem, machInst, __opClass) 560{ 561 assert(regs > 0 && regs <= 4); 562 assert(regs % elems == 0); 563 564 unsigned eBytes = (1 << size); 565 unsigned loadSize = eBytes * elems; 566 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 567 sizeof(FloatRegBits); 568 569 assert(loadRegs > 0 && loadRegs <= 4); 570 571 numMicroops = 1; 572 bool wb = (rm != 15); 573 574 if (wb) numMicroops++; 575 numMicroops += (regs / elems); 576 microOps = new StaticInstPtr[numMicroops]; 577 578 RegIndex ufp0 = NumFloatV7ArchRegs; 579 580 unsigned uopIdx = 0; 581 switch (loadSize) { 582 case 1: 583 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 584 machInst, ufp0, rn, 0, align); 585 break; 586 case 2: 587 if (eBytes == 2) { 588 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 589 machInst, ufp0, rn, 0, align); 590 } else { 591 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 592 machInst, ufp0, rn, 0, align); 593 } 594 break; 595 case 3: 596 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 597 machInst, ufp0, rn, 0, align); 598 break; 599 case 4: 600 switch (eBytes) { 601 case 1: 602 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 603 machInst, ufp0, rn, 0, align); 604 break; 605 case 2: 606 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 607 machInst, ufp0, rn, 0, align); 608 break; 609 case 4: 610 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 611 machInst, ufp0, rn, 0, align); 612 break; 613 } 614 break; 615 case 6: 616 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 617 machInst, ufp0, rn, 0, align); 618 break; 619 case 8: 620 switch (eBytes) { 621 case 2: 622 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 623 machInst, ufp0, rn, 0, align); 624 break; 625 case 4: 626 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 627 machInst, ufp0, rn, 0, align); 628 break; 629 } 630 break; 631 case 12: 632 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 633 machInst, ufp0, rn, 0, align); 634 break; 635 case 16: 636 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 637 machInst, ufp0, rn, 0, align); 638 break; 639 default: 640 // Unrecognized load size 641 microOps[uopIdx++] = new Unknown(machInst); 642 } 643 if (wb) { 644 if (rm != 15 && rm != 13) { 645 microOps[uopIdx++] = 646 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 647 } else { 648 microOps[uopIdx++] = 649 new MicroAddiUop(machInst, rn, rn, loadSize); 650 } 651 } 652 switch (elems) { 653 case 4: 654 assert(regs == 4); 655 switch (size) { 656 case 0: 657 if (all) { 658 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 659 machInst, vd * 2, ufp0, inc * 2); 660 } else { 661 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 662 machInst, vd * 2, ufp0, inc * 2, lane); 663 } 664 break; 665 case 1: 666 if (all) { 667 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 668 machInst, vd * 2, ufp0, inc * 2); 669 } else { 670 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 671 machInst, vd * 2, ufp0, inc * 2, lane); 672 } 673 break; 674 case 2: 675 if (all) { 676 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 677 machInst, vd * 2, ufp0, inc * 2); 678 } else { 679 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 680 machInst, vd * 2, ufp0, inc * 2, lane); 681 } 682 break; 683 default: 684 // Bad size 685 microOps[uopIdx++] = new Unknown(machInst); 686 break; 687 } 688 break; 689 case 3: 690 assert(regs == 3); 691 switch (size) { 692 case 0: 693 if (all) { 694 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 695 machInst, vd * 2, ufp0, inc * 2); 696 } else { 697 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 698 machInst, vd * 2, ufp0, inc * 2, lane); 699 } 700 break; 701 case 1: 702 if (all) { 703 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 704 machInst, vd * 2, ufp0, inc * 2); 705 } else { 706 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 707 machInst, vd * 2, ufp0, inc * 2, lane); 708 } 709 break; 710 case 2: 711 if (all) { 712 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 713 machInst, vd * 2, ufp0, inc * 2); 714 } else { 715 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 716 machInst, vd * 2, ufp0, inc * 2, lane); 717 } 718 break; 719 default: 720 // Bad size 721 microOps[uopIdx++] = new Unknown(machInst); 722 break; 723 } 724 break; 725 case 2: 726 assert(regs == 2); 727 assert(loadRegs <= 2); 728 switch (size) { 729 case 0: 730 if (all) { 731 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 732 machInst, vd * 2, ufp0, inc * 2); 733 } else { 734 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 735 machInst, vd * 2, ufp0, inc * 2, lane); 736 } 737 break; 738 case 1: 739 if (all) { 740 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 741 machInst, vd * 2, ufp0, inc * 2); 742 } else { 743 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 744 machInst, vd * 2, ufp0, inc * 2, lane); 745 } 746 break; 747 case 2: 748 if (all) { 749 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 750 machInst, vd * 2, ufp0, inc * 2); 751 } else { 752 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 753 machInst, vd * 2, ufp0, inc * 2, lane); 754 } 755 break; 756 default: 757 // Bad size 758 microOps[uopIdx++] = new Unknown(machInst); 759 break; 760 } 761 break; 762 case 1: 763 assert(regs == 1 \|\| (all && regs == 2)); 764 assert(loadRegs <= 2); 765 for (unsigned offset = 0; offset < regs; offset++) { 766 switch (size) { 767 case 0: 768 if (all) { 769 microOps[uopIdx++] = 770 new MicroUnpackAllNeon2to2Uop<uint8_t>( 771 machInst, (vd + offset) * 2, ufp0, inc * 2); 772 } else { 773 microOps[uopIdx++] = 774 new MicroUnpackNeon2to2Uop<uint8_t>( 775 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 776 } 777 break; 778 case 1: 779 if (all) { 780 microOps[uopIdx++] = 781 new MicroUnpackAllNeon2to2Uop<uint16_t>( 782 machInst, (vd + offset) * 2, ufp0, inc * 2); 783 } else { 784 microOps[uopIdx++] = 785 new MicroUnpackNeon2to2Uop<uint16_t>( 786 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 787 } 788 break; 789 case 2: 790 if (all) { 791 microOps[uopIdx++] = 792 new MicroUnpackAllNeon2to2Uop<uint32_t>( 793 machInst, (vd + offset) * 2, ufp0, inc * 2); 794 } else { 795 microOps[uopIdx++] = 796 new MicroUnpackNeon2to2Uop<uint32_t>( 797 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 798 } 799 break; 800 default: 801 // Bad size 802 microOps[uopIdx++] = new Unknown(machInst); 803 break; 804 } 805 } 806 break; 807 default: 808 // Bad number of elements to unpack 809 microOps[uopIdx++] = new Unknown(machInst); 810 } 811 assert(uopIdx == numMicroops); 812 813 for (unsigned i = 0; i < numMicroops - 1; i++) { 814 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 815* assert(uopPtr); 816 uopPtr->setDelayedCommit(); 817 } 818 microOps[0]->setFirstMicroop(); 819 microOps[numMicroops - 1]->setLastMicroop(); 820} 821 822VstMultOp::VstMultOp(const char mnem, ExtMachInst machInst, OpClass __opClass, 823* unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 824 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 825 PredMacroOp(mnem, machInst, __opClass) 826{ 827 assert(regs > 0 && regs <= 4); 828 assert(regs % elems == 0); 829 830 numMicroops = (regs > 2) ? 2 : 1; 831 bool wb = (rm != 15); 832 bool interleave = (elems > 1); 833 834 if (wb) numMicroops++; 835 if (interleave) numMicroops += (regs / elems); 836 microOps = new StaticInstPtr[numMicroops]; 837 838 uint32_t noAlign = TLB::MustBeOne; 839 840 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 841 842 unsigned uopIdx = 0; 843 if (interleave) { 844 switch (elems) { 845 case 4: 846 assert(regs == 4); 847 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 848 size, machInst, rMid, vd * 2, inc * 2); 849 break; 850 case 3: 851 assert(regs == 3); 852 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 853 size, machInst, rMid, vd * 2, inc * 2); 854 break; 855 case 2: 856 assert(regs == 4 \|\| regs == 2); 857 if (regs == 4) { 858 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 859 size, machInst, rMid, vd * 2, inc * 2); 860 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 861 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 862 } else { 863 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 864 size, machInst, rMid, vd * 2, inc * 2); 865 } 866 break; 867 default: 868 // Bad number of elements to interleave 869 microOps[uopIdx++] = new Unknown(machInst); 870 } 871 } 872 switch (regs) { 873 case 4: 874 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 875 size, machInst, rMid, rn, 0, align); 876 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 877 size, machInst, rMid + 4, rn, 16, noAlign); 878 break; 879 case 3: 880 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 881 size, machInst, rMid, rn, 0, align); 882 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 883 size, machInst, rMid + 4, rn, 16, noAlign); 884 break; 885 case 2: 886 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 887 size, machInst, rMid, rn, 0, align); 888 break; 889 case 1: 890 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 891 size, machInst, rMid, rn, 0, align); 892 break; 893 default: 894 // Unknown number of registers 895 microOps[uopIdx++] = new Unknown(machInst); 896 } 897 if (wb) { 898 if (rm != 15 && rm != 13) { 899 microOps[uopIdx++] = 900 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 901 } else { 902 microOps[uopIdx++] = 903 new MicroAddiUop(machInst, rn, rn, regs * 8); 904 } 905 } 906 assert(uopIdx == numMicroops); 907 908 for (unsigned i = 0; i < numMicroops - 1; i++) { 909 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 910* assert(uopPtr); 911 uopPtr->setDelayedCommit(); 912 } 913 microOps[0]->setFirstMicroop(); 914 microOps[numMicroops - 1]->setLastMicroop(); 915} 916 917VstSingleOp::VstSingleOp(const char mnem, ExtMachInst machInst, 918* OpClass __opClass, bool all, unsigned elems, 919 RegIndex rn, RegIndex vd, unsigned regs, 920 unsigned inc, uint32_t size, uint32_t align, 921 RegIndex rm, unsigned lane) : 922 PredMacroOp(mnem, machInst, __opClass) 923{ 924 assert(!all); 925 assert(regs > 0 && regs <= 4); 926 assert(regs % elems == 0); 927 928 unsigned eBytes = (1 << size); 929 unsigned storeSize = eBytes * elems; 930 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 931 sizeof(FloatRegBits); 932 933 assert(storeRegs > 0 && storeRegs <= 4); 934 935 numMicroops = 1; 936 bool wb = (rm != 15); 937 938 if (wb) numMicroops++; 939 numMicroops += (regs / elems); 940 microOps = new StaticInstPtr[numMicroops]; 941 942 RegIndex ufp0 = NumFloatV7ArchRegs; 943 944 unsigned uopIdx = 0; 945 switch (elems) { 946 case 4: 947 assert(regs == 4); 948 switch (size) { 949 case 0: 950 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 951 machInst, ufp0, vd * 2, inc * 2, lane); 952 break; 953 case 1: 954 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 955 machInst, ufp0, vd * 2, inc * 2, lane); 956 break; 957 case 2: 958 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 959 machInst, ufp0, vd * 2, inc * 2, lane); 960 break; 961 default: 962 // Bad size 963 microOps[uopIdx++] = new Unknown(machInst); 964 break; 965 } 966 break; 967 case 3: 968 assert(regs == 3); 969 switch (size) { 970 case 0: 971 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 972 machInst, ufp0, vd * 2, inc * 2, lane); 973 break; 974 case 1: 975 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 976 machInst, ufp0, vd * 2, inc * 2, lane); 977 break; 978 case 2: 979 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 980 machInst, ufp0, vd * 2, inc * 2, lane); 981 break; 982 default: 983 // Bad size 984 microOps[uopIdx++] = new Unknown(machInst); 985 break; 986 } 987 break; 988 case 2: 989 assert(regs == 2); 990 assert(storeRegs <= 2); 991 switch (size) { 992 case 0: 993 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 994 machInst, ufp0, vd * 2, inc * 2, lane); 995 break; 996 case 1: 997 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 998 machInst, ufp0, vd * 2, inc * 2, lane); 999 break; 1000 case 2: 1001 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 1002 machInst, ufp0, vd * 2, inc * 2, lane); 1003 break; 1004 default: 1005 // Bad size 1006 microOps[uopIdx++] = new Unknown(machInst); 1007 break; 1008 } 1009 break; 1010 case 1: 1011 assert(regs == 1 \|\| (all && regs == 2)); 1012 assert(storeRegs <= 2); 1013 for (unsigned offset = 0; offset < regs; offset++) { 1014 switch (size) { 1015 case 0: 1016 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 1017 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1018 break; 1019 case 1: 1020 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 1021 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1022 break; 1023 case 2: 1024 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 1025 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1026 break; 1027 default: 1028 // Bad size 1029 microOps[uopIdx++] = new Unknown(machInst); 1030 break; 1031 } 1032 } 1033 break; 1034 default: 1035 // Bad number of elements to unpack 1036 microOps[uopIdx++] = new Unknown(machInst); 1037 } 1038 switch (storeSize) { 1039 case 1: 1040 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 1041 machInst, ufp0, rn, 0, align); 1042 break; 1043 case 2: 1044 if (eBytes == 2) { 1045 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 1046 machInst, ufp0, rn, 0, align); 1047 } else { 1048 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 1049 machInst, ufp0, rn, 0, align); 1050 } 1051 break; 1052 case 3: 1053 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 1054 machInst, ufp0, rn, 0, align); 1055 break; 1056 case 4: 1057 switch (eBytes) { 1058 case 1: 1059 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 1060 machInst, ufp0, rn, 0, align); 1061 break; 1062 case 2: 1063 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1064 machInst, ufp0, rn, 0, align); 1065 break; 1066 case 4: 1067 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1068 machInst, ufp0, rn, 0, align); 1069 break; 1070 } 1071 break; 1072 case 6: 1073 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1074 machInst, ufp0, rn, 0, align); 1075 break; 1076 case 8: 1077 switch (eBytes) { 1078 case 2: 1079 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1080 machInst, ufp0, rn, 0, align); 1081 break; 1082 case 4: 1083 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1084 machInst, ufp0, rn, 0, align); 1085 break; 1086 } 1087 break; 1088 case 12: 1089 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1090 machInst, ufp0, rn, 0, align); 1091 break; 1092 case 16: 1093 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1094 machInst, ufp0, rn, 0, align); 1095 break; 1096 default: 1097 // Bad store size 1098 microOps[uopIdx++] = new Unknown(machInst); 1099 } 1100 if (wb) { 1101 if (rm != 15 && rm != 13) { 1102 microOps[uopIdx++] = 1103 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1104 } else { 1105 microOps[uopIdx++] = 1106 new MicroAddiUop(machInst, rn, rn, storeSize); 1107 } 1108 } 1109 assert(uopIdx == numMicroops); 1110 1111 for (unsigned i = 0; i < numMicroops - 1; i++) { 1112 MicroOp * uopPtr = dynamic_cast<MicroOp >(microOps[i].get()); 1113* assert(uopPtr); 1114 uopPtr->setDelayedCommit(); 1115 } 1116 microOps[0]->setFirstMicroop(); 1117 microOps[numMicroops - 1]->setLastMicroop(); 1118} 1119 1120VldMultOp64::VldMultOp64(const char mnem, ExtMachInst machInst, 1121* OpClass __opClass, RegIndex rn, RegIndex vd, 1122 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1123 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1124 PredMacroOp(mnem, machInst, __opClass) 1125{ 1126 RegIndex vx = NumFloatV8ArchRegs / 4; 1127 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1128 bool baseIsSP = isSP((IntRegIndex) rnsp); 1129 1130 numMicroops = wb ? 1 : 0; 1131 1132 int totNumBytes = numRegs * dataSize / 8; 1133 assert(totNumBytes <= 64); 1134 1135 // The guiding principle here is that no more than 16 bytes can be 1136 // transferred at a time 1137 int numMemMicroops = totNumBytes / 16; 1138 int residuum = totNumBytes % 16; 1139 if (residuum) 1140 ++numMemMicroops; 1141 numMicroops += numMemMicroops; 1142 1143 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1144 numMicroops += numMarshalMicroops; 1145 1146 microOps = new StaticInstPtr[numMicroops]; 1147 unsigned uopIdx = 0; 1148 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1149 TLB::AllowUnaligned; 1150 1151 int i = 0; 1152 for (; i < numMemMicroops - 1; ++i) { 1153 microOps[uopIdx++] = new MicroNeonLoad64( 1154 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1155 baseIsSP, 16 /* accSize /, eSize); 1156* } 1157 microOps[uopIdx++] = new MicroNeonLoad64( 1158 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1159 residuum ? residuum : 16 /* accSize /, eSize); 1160* 1161 // Writeback microop: the post-increment amount is encoded in "Rm": a 1162 // 64-bit general register OR as '11111' for an immediate value equal to 1163 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1164 if (wb) { 1165 if (rm != ((RegIndex) INTREG_X31)) { 1166 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1167 UXTX, 0); 1168 } else { 1169 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1170 totNumBytes); 1171 } 1172 } 1173 1174 for (int i = 0; i < numMarshalMicroops; ++i) { 1175 switch(numRegs) { 1176 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( 1177 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1178 numStructElems, 1, i /* step /); 1179* break; 1180 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( 1181 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1182 numStructElems, 2, i /* step /); 1183* break; 1184 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( 1185 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1186 numStructElems, 3, i /* step /); 1187* break; 1188 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( 1189 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1190 numStructElems, 4, i /* step /); 1191* break; 1192 default: panic("Invalid number of registers"); 1193 } 1194 1195 } 1196 1197 assert(uopIdx == numMicroops); 1198 1199 for (int i = 0; i < numMicroops - 1; ++i) { 1200 microOps[i]->setDelayedCommit(); 1201 } 1202 microOps[numMicroops - 1]->setLastMicroop(); 1203} 1204 1205VstMultOp64::VstMultOp64(const char mnem, ExtMachInst machInst, 1206* OpClass __opClass, RegIndex rn, RegIndex vd, 1207 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1208 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1209 PredMacroOp(mnem, machInst, __opClass) 1210{ 1211 RegIndex vx = NumFloatV8ArchRegs / 4; 1212 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1213 bool baseIsSP = isSP((IntRegIndex) rnsp); 1214 1215 numMicroops = wb ? 1 : 0; 1216 1217 int totNumBytes = numRegs * dataSize / 8; 1218 assert(totNumBytes <= 64); 1219 1220 // The guiding principle here is that no more than 16 bytes can be 1221 // transferred at a time 1222 int numMemMicroops = totNumBytes / 16; 1223 int residuum = totNumBytes % 16; 1224 if (residuum) 1225 ++numMemMicroops; 1226 numMicroops += numMemMicroops; 1227 1228 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1229 numMicroops += numMarshalMicroops; 1230 1231 microOps = new StaticInstPtr[numMicroops]; 1232 unsigned uopIdx = 0; 1233 1234 for (int i = 0; i < numMarshalMicroops; ++i) { 1235 switch (numRegs) { 1236 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( 1237 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1238 numStructElems, 1, i /* step /); 1239* break; 1240 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( 1241 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1242 numStructElems, 2, i /* step /); 1243* break; 1244 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( 1245 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1246 numStructElems, 3, i /* step /); 1247* break; 1248 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( 1249 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1250 numStructElems, 4, i /* step /); 1251* break; 1252 default: panic("Invalid number of registers"); 1253 } 1254 } 1255 1256 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1257 TLB::AllowUnaligned; 1258 1259 int i = 0; 1260 for (; i < numMemMicroops - 1; ++i) { 1261 microOps[uopIdx++] = new MicroNeonStore64( 1262 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1263 baseIsSP, 16 /* accSize /, eSize); 1264* } 1265 microOps[uopIdx++] = new MicroNeonStore64( 1266 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1267 residuum ? residuum : 16 /* accSize /, eSize); 1268* 1269 // Writeback microop: the post-increment amount is encoded in "Rm": a 1270 // 64-bit general register OR as '11111' for an immediate value equal to 1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1272 if (wb) { 1273 if (rm != ((RegIndex) INTREG_X31)) { 1274 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1275 UXTX, 0); 1276 } else { 1277 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1278 totNumBytes); 1279 } 1280 } 1281 1282 assert(uopIdx == numMicroops); 1283 1284 for (int i = 0; i < numMicroops - 1; i++) { 1285 microOps[i]->setDelayedCommit(); 1286 } 1287 microOps[numMicroops - 1]->setLastMicroop(); 1288} 1289 1290VldSingleOp64::VldSingleOp64(const char mnem, ExtMachInst machInst, 1291* OpClass __opClass, RegIndex rn, RegIndex vd, 1292 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1293 uint8_t numStructElems, uint8_t index, bool wb, 1294 bool replicate) : 1295 PredMacroOp(mnem, machInst, __opClass), 1296 eSize(0), dataSize(0), numStructElems(0), index(0), 1297 wb(false), replicate(false) 1298 1299{ 1300 RegIndex vx = NumFloatV8ArchRegs / 4; 1301 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1302 bool baseIsSP = isSP((IntRegIndex) rnsp); 1303 1304 numMicroops = wb ? 1 : 0; 1305 1306 int eSizeBytes = 1 << eSize; 1307 int totNumBytes = numStructElems * eSizeBytes; 1308 assert(totNumBytes <= 64); 1309 1310 // The guiding principle here is that no more than 16 bytes can be 1311 // transferred at a time 1312 int numMemMicroops = totNumBytes / 16; 1313 int residuum = totNumBytes % 16; 1314 if (residuum) 1315 ++numMemMicroops; 1316 numMicroops += numMemMicroops; 1317 1318 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1319 numMicroops += numMarshalMicroops; 1320 1321 microOps = new StaticInstPtr[numMicroops]; 1322 unsigned uopIdx = 0; 1323 1324 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1325 TLB::AllowUnaligned; 1326 1327 int i = 0; 1328 for (; i < numMemMicroops - 1; ++i) { 1329 microOps[uopIdx++] = new MicroNeonLoad64( 1330 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1331 baseIsSP, 16 /* accSize /, eSize); 1332* } 1333 microOps[uopIdx++] = new MicroNeonLoad64( 1334 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1335 residuum ? residuum : 16 /* accSize /, eSize); 1336* 1337 // Writeback microop: the post-increment amount is encoded in "Rm": a 1338 // 64-bit general register OR as '11111' for an immediate value equal to 1339 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1340 if (wb) { 1341 if (rm != ((RegIndex) INTREG_X31)) { 1342 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1343 UXTX, 0); 1344 } else { 1345 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1346 totNumBytes); 1347 } 1348 } 1349 1350 for (int i = 0; i < numMarshalMicroops; ++i) { 1351 microOps[uopIdx++] = new MicroUnpackNeon64( 1352 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1353 numStructElems, index, i /* step /, replicate); 1354* } 1355 1356 assert(uopIdx == numMicroops); 1357 1358 for (int i = 0; i < numMicroops - 1; i++) { 1359 microOps[i]->setDelayedCommit(); 1360 } 1361 microOps[numMicroops - 1]->setLastMicroop(); 1362} 1363 1364VstSingleOp64::VstSingleOp64(const char mnem, ExtMachInst machInst, 1365* OpClass __opClass, RegIndex rn, RegIndex vd, 1366 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1367 uint8_t numStructElems, uint8_t index, bool wb, 1368 bool replicate) : 1369 PredMacroOp(mnem, machInst, __opClass), 1370 eSize(0), dataSize(0), numStructElems(0), index(0), 1371 wb(false), replicate(false) 1372{ 1373 RegIndex vx = NumFloatV8ArchRegs / 4; 1374 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1375 bool baseIsSP = isSP((IntRegIndex) rnsp); 1376 1377 numMicroops = wb ? 1 : 0; 1378 1379 int eSizeBytes = 1 << eSize; 1380 int totNumBytes = numStructElems * eSizeBytes; 1381 assert(totNumBytes <= 64); 1382 1383 // The guiding principle here is that no more than 16 bytes can be 1384 // transferred at a time 1385 int numMemMicroops = totNumBytes / 16; 1386 int residuum = totNumBytes % 16; 1387 if (residuum) 1388 ++numMemMicroops; 1389 numMicroops += numMemMicroops; 1390 1391 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1392 numMicroops += numMarshalMicroops; 1393 1394 microOps = new StaticInstPtr[numMicroops]; 1395 unsigned uopIdx = 0; 1396 1397 for (int i = 0; i < numMarshalMicroops; ++i) { 1398 microOps[uopIdx++] = new MicroPackNeon64( 1399 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1400 numStructElems, index, i /* step /, replicate); 1401* } 1402 1403 uint32_t memaccessFlags = TLB::MustBeOne \| (TLB::ArmFlags) eSize \| 1404 TLB::AllowUnaligned; 1405 1406 int i = 0; 1407 for (; i < numMemMicroops - 1; ++i) { 1408 microOps[uopIdx++] = new MicroNeonStore64( 1409 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1410 baseIsSP, 16 /* accsize /, eSize); 1411* } 1412 microOps[uopIdx++] = new MicroNeonStore64( 1413 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1414 residuum ? residuum : 16 /* accSize /, eSize); 1415* 1416 // Writeback microop: the post-increment amount is encoded in "Rm": a 1417 // 64-bit general register OR as '11111' for an immediate value equal to 1418 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1419 if (wb) { 1420 if (rm != ((RegIndex) INTREG_X31)) { 1421 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1422 UXTX, 0); 1423 } else { 1424 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1425 totNumBytes); 1426 } 1427 } 1428 1429 assert(uopIdx == numMicroops); 1430 1431 for (int i = 0; i < numMicroops - 1; i++) { 1432 microOps[i]->setDelayedCommit(); 1433 } 1434 microOps[numMicroops - 1]->setLastMicroop(); 1435} 1436 1437MacroVFPMemOp::MacroVFPMemOp(const char mnem, ExtMachInst machInst, 1438* OpClass __opClass, IntRegIndex rn, 1439 RegIndex vd, bool single, bool up, 1440 bool writeback, bool load, uint32_t offset) : 1441 PredMacroOp(mnem, machInst, __opClass) 1442{ 1443 int i = 0; 1444 1445 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1446 // to be functionally identical except that fldmx is deprecated. For now 1447 // we'll assume they're otherwise interchangable. 1448 int count = (single ? offset : (offset / 2)); 1449 if (count == 0 \|\| count > NumFloatV7ArchRegs) 1450 warn_once("Bad offset field for VFP load/store multiple.\n"); 1451 if (count == 0) { 1452 // Force there to be at least one microop so the macroop makes sense. 1453 writeback = true; 1454 } 1455 if (count > NumFloatV7ArchRegs) 1456 count = NumFloatV7ArchRegs; 1457 1458 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1459 microOps = new StaticInstPtr[numMicroops]; 1460 1461 int64_t addr = 0; 1462 1463 if (!up) 1464 addr = 4 * offset; 1465 1466 bool tempUp = up; 1467 for (int j = 0; j < count; j++) { 1468 if (load) { 1469 if (single) { 1470 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1471 tempUp, addr); 1472 } else { 1473 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1474 tempUp, addr); 1475 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1476 addr + (up ? 4 : -4)); 1477 } 1478 } else { 1479 if (single) { 1480 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1481 tempUp, addr); 1482 } else { 1483 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1484 tempUp, addr); 1485 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1486 addr + (up ? 4 : -4)); 1487 } 1488 } 1489 if (!tempUp) { 1490 addr -= (single ? 4 : 8); 1491 // The microops don't handle negative displacement, so turn if we 1492 // hit zero, flip polarity and start adding. 1493 if (addr <= 0) { 1494 tempUp = true; 1495 addr = -addr; 1496 } 1497 } else { 1498 addr += (single ? 4 : 8); 1499 } 1500 } 1501 1502 if (writeback) { 1503 if (up) { 1504 microOps[i++] = 1505 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1506 } else { 1507 microOps[i++] = 1508 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1509 } 1510 } 1511 1512 assert(numMicroops == i); 1513 microOps[numMicroops - 1]->setLastMicroop(); 1514 1515 for (StaticInstPtr curUop = microOps; 1516* !(curUop)->isLastMicroop(); curUop++) { 1517* MicroOp * uopPtr = dynamic_cast<MicroOp >(curUop->get()); 1518* assert(uopPtr); 1519 uopPtr->setDelayedCommit(); 1520 } 1521} 1522 1523std::string 1524MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1525{ 1526* std::stringstream ss; 1527 printMnemonic(ss); 1528 printReg(ss, ura); 1529 ss << ", "; 1530 printReg(ss, urb); 1531 ss << ", "; 1532 ccprintf(ss, "#%d", imm); 1533 return ss.str(); 1534} 1535 1536std::string 1537MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1538{ 1539* std::stringstream ss; 1540 printMnemonic(ss); 1541 printReg(ss, ura); 1542 ss << ", "; 1543 printReg(ss, urb); 1544 ss << ", "; 1545 ccprintf(ss, "#%d", imm); 1546 return ss.str(); 1547} 1548 1549std::string 1550MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable symtab) const 1551{ 1552* std::stringstream ss; 1553 printMnemonic(ss); 1554 ss << "[PC,CPSR]"; 1555 return ss.str(); 1556} 1557 1558std::string 1559MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1560{ 1561* std::stringstream ss; 1562 printMnemonic(ss); 1563 printReg(ss, ura); 1564 ccprintf(ss, ", "); 1565 printReg(ss, urb); 1566 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1567 return ss.str(); 1568} 1569 1570std::string 1571MicroIntMov::generateDisassembly(Addr pc, const SymbolTable symtab) const 1572{ 1573* std::stringstream ss; 1574 printMnemonic(ss); 1575 printReg(ss, ura); 1576 ss << ", "; 1577 printReg(ss, urb); 1578 return ss.str(); 1579} 1580 1581std::string 1582MicroIntOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1583{ 1584* std::stringstream ss; 1585 printMnemonic(ss); 1586 printReg(ss, ura); 1587 ss << ", "; 1588 printReg(ss, urb); 1589 ss << ", "; 1590 printReg(ss, urc); 1591 return ss.str(); 1592} 1593 1594std::string 1595MicroMemOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1596{ 1597* std::stringstream ss; 1598 printMnemonic(ss); 1599 if (isFloating()) 1600 printReg(ss, ura + FP_Reg_Base); 1601 else 1602 printReg(ss, ura); 1603 ss << ", ["; 1604 printReg(ss, urb); 1605 ss << ", "; 1606 ccprintf(ss, "#%d", imm); 1607 ss << "]"; 1608 return ss.str(); 1609} 1610 1611std::string 1612MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable symtab) const 1613{ 1614* std::stringstream ss; 1615 printMnemonic(ss); 1616 printReg(ss, dest); 1617 ss << ","; 1618 printReg(ss, dest2); 1619 ss << ", ["; 1620 printReg(ss, urb); 1621 ss << ", "; 1622 ccprintf(ss, "#%d", imm); 1623 ss << "]"; 1624 return ss.str(); 1625} 1626 1627}