macromem.cc revision 10537:47fe87b0cf97
1/* 2 * Copyright (c) 2010-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2007-2008 The Florida State University 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Stephen Hines 41 */ 42 43#include <sstream> 44 45#include "arch/arm/insts/macromem.hh" 46 47#include "arch/arm/generated/decoder.hh" 48#include "arch/arm/insts/neon64_mem.hh" 49 50using namespace std; 51using namespace ArmISAInst; 52 53namespace ArmISA 54{ 55 56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, 57 OpClass __opClass, IntRegIndex rn, 58 bool index, bool up, bool user, bool writeback, 59 bool load, uint32_t reglist) : 60 PredMacroOp(mnem, machInst, __opClass) 61{ 62 uint32_t regs = reglist; 63 uint32_t ones = number_of_ones(reglist); 64 uint32_t mem_ops = ones; 65 66 // Copy the base address register if we overwrite it, or if this instruction 67 // is basically a no-op (we have to do something) 68 bool copy_base = (bits(reglist, rn) && load) || !ones; 69 bool force_user = user & !bits(reglist, 15); 70 bool exception_ret = user & bits(reglist, 15); 71 bool pc_temp = load && writeback && bits(reglist, 15); 72 73 if (!ones) { 74 numMicroops = 1; 75 } else if (load) { 76 numMicroops = ((ones + 1) / 2) 77 + ((ones % 2 == 0 && exception_ret) ? 1 : 0) 78 + (copy_base ? 1 : 0) 79 + (writeback? 1 : 0) 80 + (pc_temp ? 1 : 0); 81 } else { 82 numMicroops = ones + (writeback ? 1 : 0); 83 } 84 85 microOps = new StaticInstPtr[numMicroops]; 86 87 uint32_t addr = 0; 88 89 if (!up) 90 addr = (ones << 2) - 4; 91 92 if (!index) 93 addr += 4; 94 95 StaticInstPtr *uop = microOps; 96 97 // Add 0 to Rn and stick it in ureg0. 98 // This is equivalent to a move. 99 if (copy_base) 100 *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0); 101 102 unsigned reg = 0; 103 while (mem_ops != 0) { 104 // Do load operations in pairs if possible 105 if (load && mem_ops >= 2 && 106 !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) { 107 // 64-bit memory operation 108 // Find 2 set register bits (clear them after finding) 109 unsigned reg_idx1; 110 unsigned reg_idx2; 111 112 // Find the first register 113 while (!bits(regs, reg)) reg++; 114 replaceBits(regs, reg, 0); 115 reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg; 116 117 // Find the second register 118 while (!bits(regs, reg)) reg++; 119 replaceBits(regs, reg, 0); 120 reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg; 121 122 // Load into temp reg if necessary 123 if (reg_idx2 == INTREG_PC && pc_temp) 124 reg_idx2 = INTREG_UREG1; 125 126 // Actually load both registers from memory 127 *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2, 128 copy_base ? INTREG_UREG0 : rn, up, addr); 129 130 if (!writeback && reg_idx2 == INTREG_PC) { 131 // No writeback if idx==pc, set appropriate flags 132 (*uop)->setFlag(StaticInst::IsControl); 133 (*uop)->setFlag(StaticInst::IsIndirectControl); 134 135 if (!(condCode == COND_AL || condCode == COND_UC)) 136 (*uop)->setFlag(StaticInst::IsCondControl); 137 else 138 (*uop)->setFlag(StaticInst::IsUncondControl); 139 } 140 141 if (up) addr += 8; 142 else addr -= 8; 143 mem_ops -= 2; 144 } else { 145 // 32-bit memory operation 146 // Find register for operation 147 unsigned reg_idx; 148 while(!bits(regs, reg)) reg++; 149 replaceBits(regs, reg, 0); 150 reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg; 151 152 if (load) { 153 if (writeback && reg_idx == INTREG_PC) { 154 // If this instruction changes the PC and performs a 155 // writeback, ensure the pc load/branch is the last uop. 156 // Load into a temp reg here. 157 *uop = new MicroLdrUop(machInst, INTREG_UREG1, 158 copy_base ? INTREG_UREG0 : rn, up, addr); 159 } else if (reg_idx == INTREG_PC && exception_ret) { 160 // Special handling for exception return 161 *uop = new MicroLdrRetUop(machInst, reg_idx, 162 copy_base ? INTREG_UREG0 : rn, up, addr); 163 } else { 164 // standard single load uop 165 *uop = new MicroLdrUop(machInst, reg_idx, 166 copy_base ? INTREG_UREG0 : rn, up, addr); 167 } 168 169 // Loading pc as last operation? Set appropriate flags. 170 if (!writeback && reg_idx == INTREG_PC) { 171 (*uop)->setFlag(StaticInst::IsControl); 172 (*uop)->setFlag(StaticInst::IsIndirectControl); 173 174 if (!(condCode == COND_AL || condCode == COND_UC)) 175 (*uop)->setFlag(StaticInst::IsCondControl); 176 else 177 (*uop)->setFlag(StaticInst::IsUncondControl); 178 } 179 } else { 180 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr); 181 } 182 183 if (up) addr += 4; 184 else addr -= 4; 185 --mem_ops; 186 } 187 188 // Load/store micro-op generated, go to next uop 189 ++uop; 190 } 191 192 if (writeback && ones) { 193 // Perform writeback uop operation 194 if (up) 195 *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4); 196 else 197 *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4); 198 199 // Write PC after address writeback? 200 if (pc_temp) { 201 if (exception_ret) { 202 *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); 203 } else { 204 *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1); 205 } 206 (*uop)->setFlag(StaticInst::IsControl); 207 (*uop)->setFlag(StaticInst::IsIndirectControl); 208 209 if (!(condCode == COND_AL || condCode == COND_UC)) 210 (*uop)->setFlag(StaticInst::IsCondControl); 211 else 212 (*uop)->setFlag(StaticInst::IsUncondControl); 213 214 if (rn == INTREG_SP) 215 (*uop)->setFlag(StaticInst::IsReturn); 216 217 ++uop; 218 } 219 } 220 221 --uop; 222 (*uop)->setLastMicroop(); 223 224 /* Take the control flags from the last microop for the macroop */ 225 if ((*uop)->isControl()) 226 setFlag(StaticInst::IsControl); 227 if ((*uop)->isCondCtrl()) 228 setFlag(StaticInst::IsCondControl); 229 if ((*uop)->isUncondCtrl()) 230 setFlag(StaticInst::IsUncondControl); 231 if ((*uop)->isIndirectCtrl()) 232 setFlag(StaticInst::IsIndirectControl); 233 if ((*uop)->isReturn()) 234 setFlag(StaticInst::IsReturn); 235 236 for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { 237 (*uop)->setDelayedCommit(); 238 } 239} 240 241PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 242 uint32_t size, bool fp, bool load, bool noAlloc, 243 bool signExt, bool exclusive, bool acrel, 244 int64_t imm, AddrMode mode, 245 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : 246 PredMacroOp(mnem, machInst, __opClass) 247{ 248 bool post = (mode == AddrMd_PostIndex); 249 bool writeback = (mode != AddrMd_Offset); 250 251 if (load) { 252 // Use integer rounding to round up loads of size 4 253 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0); 254 } else { 255 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0); 256 } 257 microOps = new StaticInstPtr[numMicroops]; 258 259 StaticInstPtr *uop = microOps; 260 261 rn = makeSP(rn); 262 263 if (!post) { 264 *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, 265 post ? 0 : imm); 266 } 267 268 if (fp) { 269 if (size == 16) { 270 if (load) { 271 *uop++ = new MicroLdFp16Uop(machInst, rt, 272 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 273 *uop++ = new MicroLdFp16Uop(machInst, rt2, 274 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 275 } else { 276 *uop++ = new MicroStrQBFpXImmUop(machInst, rt, 277 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 278 *uop++ = new MicroStrQTFpXImmUop(machInst, rt, 279 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 280 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2, 281 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 282 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2, 283 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); 284 } 285 } else if (size == 8) { 286 if (load) { 287 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2, 288 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 289 } else { 290 *uop++ = new MicroStrFpXImmUop(machInst, rt, 291 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 292 *uop++ = new MicroStrFpXImmUop(machInst, rt2, 293 post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel); 294 } 295 } else if (size == 4) { 296 if (load) { 297 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2, 298 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 299 } else { 300 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2, 301 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 302 } 303 } 304 } else { 305 if (size == 8) { 306 if (load) { 307 *uop++ = new MicroLdPairUop(machInst, rt, rt2, 308 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 309 } else { 310 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0, 311 0, noAlloc, exclusive, acrel); 312 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0, 313 size, noAlloc, exclusive, acrel); 314 } 315 } else if (size == 4) { 316 if (load) { 317 if (signExt) { 318 *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2, 319 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 320 } else { 321 *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2, 322 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 323 } 324 } else { 325 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2, 326 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); 327 } 328 } 329 } 330 331 if (writeback) { 332 *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0, 333 post ? imm : 0); 334 } 335 336 assert(uop == µOps[numMicroops]); 337 (*--uop)->setLastMicroop(); 338 339 for (StaticInstPtr *curUop = microOps; 340 !(*curUop)->isLastMicroop(); curUop++) { 341 (*curUop)->setDelayedCommit(); 342 } 343} 344 345BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, 346 OpClass __opClass, bool load, IntRegIndex dest, 347 IntRegIndex base, int64_t imm) : 348 PredMacroOp(mnem, machInst, __opClass) 349{ 350 numMicroops = load ? 1 : 2; 351 microOps = new StaticInstPtr[numMicroops]; 352 353 StaticInstPtr *uop = microOps; 354 355 if (load) { 356 *uop = new MicroLdFp16Uop(machInst, dest, base, imm); 357 } else { 358 *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 359 (*uop)->setDelayedCommit(); 360 *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 361 } 362 (*uop)->setLastMicroop(); 363} 364 365BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, 366 OpClass __opClass, bool load, IntRegIndex dest, 367 IntRegIndex base, int64_t imm) : 368 PredMacroOp(mnem, machInst, __opClass) 369{ 370 numMicroops = load ? 2 : 3; 371 microOps = new StaticInstPtr[numMicroops]; 372 373 StaticInstPtr *uop = microOps; 374 375 if (load) { 376 *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0); 377 } else { 378 *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0); 379 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0); 380 } 381 *uop = new MicroAddXiUop(machInst, base, base, imm); 382 (*uop)->setLastMicroop(); 383 384 for (StaticInstPtr *curUop = microOps; 385 !(*curUop)->isLastMicroop(); curUop++) { 386 (*curUop)->setDelayedCommit(); 387 } 388} 389 390BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, 391 OpClass __opClass, bool load, IntRegIndex dest, 392 IntRegIndex base, int64_t imm) : 393 PredMacroOp(mnem, machInst, __opClass) 394{ 395 numMicroops = load ? 2 : 3; 396 microOps = new StaticInstPtr[numMicroops]; 397 398 StaticInstPtr *uop = microOps; 399 400 if (load) { 401 *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm); 402 } else { 403 *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 404 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 405 } 406 *uop = new MicroAddXiUop(machInst, base, base, imm); 407 (*uop)->setLastMicroop(); 408 409 for (StaticInstPtr *curUop = microOps; 410 !(*curUop)->isLastMicroop(); curUop++) { 411 (*curUop)->setDelayedCommit(); 412 } 413} 414 415BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, 416 OpClass __opClass, bool load, IntRegIndex dest, 417 IntRegIndex base, IntRegIndex offset, 418 ArmExtendType type, int64_t imm) : 419 PredMacroOp(mnem, machInst, __opClass) 420{ 421 numMicroops = load ? 1 : 2; 422 microOps = new StaticInstPtr[numMicroops]; 423 424 StaticInstPtr *uop = microOps; 425 426 if (load) { 427 *uop = new MicroLdFp16RegUop(machInst, dest, base, 428 offset, type, imm); 429 } else { 430 *uop = new MicroStrQBFpXRegUop(machInst, dest, base, 431 offset, type, imm); 432 (*uop)->setDelayedCommit(); 433 *++uop = new MicroStrQTFpXRegUop(machInst, dest, base, 434 offset, type, imm); 435 } 436 437 (*uop)->setLastMicroop(); 438} 439 440BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, 441 OpClass __opClass, IntRegIndex dest, 442 int64_t imm) : 443 PredMacroOp(mnem, machInst, __opClass) 444{ 445 numMicroops = 1; 446 microOps = new StaticInstPtr[numMicroops]; 447 448 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm); 449 microOps[0]->setLastMicroop(); 450} 451 452VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 453 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 454 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 455 PredMacroOp(mnem, machInst, __opClass) 456{ 457 assert(regs > 0 && regs <= 4); 458 assert(regs % elems == 0); 459 460 numMicroops = (regs > 2) ? 2 : 1; 461 bool wb = (rm != 15); 462 bool deinterleave = (elems > 1); 463 464 if (wb) numMicroops++; 465 if (deinterleave) numMicroops += (regs / elems); 466 microOps = new StaticInstPtr[numMicroops]; 467 468 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 469 470 uint32_t noAlign = TLB::MustBeOne; 471 472 unsigned uopIdx = 0; 473 switch (regs) { 474 case 4: 475 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 476 size, machInst, rMid, rn, 0, align); 477 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 478 size, machInst, rMid + 4, rn, 16, noAlign); 479 break; 480 case 3: 481 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 482 size, machInst, rMid, rn, 0, align); 483 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 484 size, machInst, rMid + 4, rn, 16, noAlign); 485 break; 486 case 2: 487 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 488 size, machInst, rMid, rn, 0, align); 489 break; 490 case 1: 491 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 492 size, machInst, rMid, rn, 0, align); 493 break; 494 default: 495 // Unknown number of registers 496 microOps[uopIdx++] = new Unknown(machInst); 497 } 498 if (wb) { 499 if (rm != 15 && rm != 13) { 500 microOps[uopIdx++] = 501 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 502 } else { 503 microOps[uopIdx++] = 504 new MicroAddiUop(machInst, rn, rn, regs * 8); 505 } 506 } 507 if (deinterleave) { 508 switch (elems) { 509 case 4: 510 assert(regs == 4); 511 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 512 size, machInst, vd * 2, rMid, inc * 2); 513 break; 514 case 3: 515 assert(regs == 3); 516 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 517 size, machInst, vd * 2, rMid, inc * 2); 518 break; 519 case 2: 520 assert(regs == 4 || regs == 2); 521 if (regs == 4) { 522 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 523 size, machInst, vd * 2, rMid, inc * 2); 524 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 525 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 526 } else { 527 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 528 size, machInst, vd * 2, rMid, inc * 2); 529 } 530 break; 531 default: 532 // Bad number of elements to deinterleave 533 microOps[uopIdx++] = new Unknown(machInst); 534 } 535 } 536 assert(uopIdx == numMicroops); 537 538 for (unsigned i = 0; i < numMicroops - 1; i++) { 539 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 540 assert(uopPtr); 541 uopPtr->setDelayedCommit(); 542 } 543 microOps[numMicroops - 1]->setLastMicroop(); 544} 545 546VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, 547 OpClass __opClass, bool all, unsigned elems, 548 RegIndex rn, RegIndex vd, unsigned regs, 549 unsigned inc, uint32_t size, uint32_t align, 550 RegIndex rm, unsigned lane) : 551 PredMacroOp(mnem, machInst, __opClass) 552{ 553 assert(regs > 0 && regs <= 4); 554 assert(regs % elems == 0); 555 556 unsigned eBytes = (1 << size); 557 unsigned loadSize = eBytes * elems; 558 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 559 sizeof(FloatRegBits); 560 561 assert(loadRegs > 0 && loadRegs <= 4); 562 563 numMicroops = 1; 564 bool wb = (rm != 15); 565 566 if (wb) numMicroops++; 567 numMicroops += (regs / elems); 568 microOps = new StaticInstPtr[numMicroops]; 569 570 RegIndex ufp0 = NumFloatV7ArchRegs; 571 572 unsigned uopIdx = 0; 573 switch (loadSize) { 574 case 1: 575 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 576 machInst, ufp0, rn, 0, align); 577 break; 578 case 2: 579 if (eBytes == 2) { 580 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 581 machInst, ufp0, rn, 0, align); 582 } else { 583 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 584 machInst, ufp0, rn, 0, align); 585 } 586 break; 587 case 3: 588 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 589 machInst, ufp0, rn, 0, align); 590 break; 591 case 4: 592 switch (eBytes) { 593 case 1: 594 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 595 machInst, ufp0, rn, 0, align); 596 break; 597 case 2: 598 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 599 machInst, ufp0, rn, 0, align); 600 break; 601 case 4: 602 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 603 machInst, ufp0, rn, 0, align); 604 break; 605 } 606 break; 607 case 6: 608 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 609 machInst, ufp0, rn, 0, align); 610 break; 611 case 8: 612 switch (eBytes) { 613 case 2: 614 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 615 machInst, ufp0, rn, 0, align); 616 break; 617 case 4: 618 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 619 machInst, ufp0, rn, 0, align); 620 break; 621 } 622 break; 623 case 12: 624 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 625 machInst, ufp0, rn, 0, align); 626 break; 627 case 16: 628 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 629 machInst, ufp0, rn, 0, align); 630 break; 631 default: 632 // Unrecognized load size 633 microOps[uopIdx++] = new Unknown(machInst); 634 } 635 if (wb) { 636 if (rm != 15 && rm != 13) { 637 microOps[uopIdx++] = 638 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 639 } else { 640 microOps[uopIdx++] = 641 new MicroAddiUop(machInst, rn, rn, loadSize); 642 } 643 } 644 switch (elems) { 645 case 4: 646 assert(regs == 4); 647 switch (size) { 648 case 0: 649 if (all) { 650 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 651 machInst, vd * 2, ufp0, inc * 2); 652 } else { 653 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 654 machInst, vd * 2, ufp0, inc * 2, lane); 655 } 656 break; 657 case 1: 658 if (all) { 659 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 660 machInst, vd * 2, ufp0, inc * 2); 661 } else { 662 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 663 machInst, vd * 2, ufp0, inc * 2, lane); 664 } 665 break; 666 case 2: 667 if (all) { 668 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 669 machInst, vd * 2, ufp0, inc * 2); 670 } else { 671 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 672 machInst, vd * 2, ufp0, inc * 2, lane); 673 } 674 break; 675 default: 676 // Bad size 677 microOps[uopIdx++] = new Unknown(machInst); 678 break; 679 } 680 break; 681 case 3: 682 assert(regs == 3); 683 switch (size) { 684 case 0: 685 if (all) { 686 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 687 machInst, vd * 2, ufp0, inc * 2); 688 } else { 689 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 690 machInst, vd * 2, ufp0, inc * 2, lane); 691 } 692 break; 693 case 1: 694 if (all) { 695 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 696 machInst, vd * 2, ufp0, inc * 2); 697 } else { 698 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 699 machInst, vd * 2, ufp0, inc * 2, lane); 700 } 701 break; 702 case 2: 703 if (all) { 704 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 705 machInst, vd * 2, ufp0, inc * 2); 706 } else { 707 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 708 machInst, vd * 2, ufp0, inc * 2, lane); 709 } 710 break; 711 default: 712 // Bad size 713 microOps[uopIdx++] = new Unknown(machInst); 714 break; 715 } 716 break; 717 case 2: 718 assert(regs == 2); 719 assert(loadRegs <= 2); 720 switch (size) { 721 case 0: 722 if (all) { 723 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 724 machInst, vd * 2, ufp0, inc * 2); 725 } else { 726 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 727 machInst, vd * 2, ufp0, inc * 2, lane); 728 } 729 break; 730 case 1: 731 if (all) { 732 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 733 machInst, vd * 2, ufp0, inc * 2); 734 } else { 735 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 736 machInst, vd * 2, ufp0, inc * 2, lane); 737 } 738 break; 739 case 2: 740 if (all) { 741 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 742 machInst, vd * 2, ufp0, inc * 2); 743 } else { 744 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 745 machInst, vd * 2, ufp0, inc * 2, lane); 746 } 747 break; 748 default: 749 // Bad size 750 microOps[uopIdx++] = new Unknown(machInst); 751 break; 752 } 753 break; 754 case 1: 755 assert(regs == 1 || (all && regs == 2)); 756 assert(loadRegs <= 2); 757 for (unsigned offset = 0; offset < regs; offset++) { 758 switch (size) { 759 case 0: 760 if (all) { 761 microOps[uopIdx++] = 762 new MicroUnpackAllNeon2to2Uop<uint8_t>( 763 machInst, (vd + offset) * 2, ufp0, inc * 2); 764 } else { 765 microOps[uopIdx++] = 766 new MicroUnpackNeon2to2Uop<uint8_t>( 767 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 768 } 769 break; 770 case 1: 771 if (all) { 772 microOps[uopIdx++] = 773 new MicroUnpackAllNeon2to2Uop<uint16_t>( 774 machInst, (vd + offset) * 2, ufp0, inc * 2); 775 } else { 776 microOps[uopIdx++] = 777 new MicroUnpackNeon2to2Uop<uint16_t>( 778 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 779 } 780 break; 781 case 2: 782 if (all) { 783 microOps[uopIdx++] = 784 new MicroUnpackAllNeon2to2Uop<uint32_t>( 785 machInst, (vd + offset) * 2, ufp0, inc * 2); 786 } else { 787 microOps[uopIdx++] = 788 new MicroUnpackNeon2to2Uop<uint32_t>( 789 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 790 } 791 break; 792 default: 793 // Bad size 794 microOps[uopIdx++] = new Unknown(machInst); 795 break; 796 } 797 } 798 break; 799 default: 800 // Bad number of elements to unpack 801 microOps[uopIdx++] = new Unknown(machInst); 802 } 803 assert(uopIdx == numMicroops); 804 805 for (unsigned i = 0; i < numMicroops - 1; i++) { 806 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 807 assert(uopPtr); 808 uopPtr->setDelayedCommit(); 809 } 810 microOps[numMicroops - 1]->setLastMicroop(); 811} 812 813VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 814 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 815 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 816 PredMacroOp(mnem, machInst, __opClass) 817{ 818 assert(regs > 0 && regs <= 4); 819 assert(regs % elems == 0); 820 821 numMicroops = (regs > 2) ? 2 : 1; 822 bool wb = (rm != 15); 823 bool interleave = (elems > 1); 824 825 if (wb) numMicroops++; 826 if (interleave) numMicroops += (regs / elems); 827 microOps = new StaticInstPtr[numMicroops]; 828 829 uint32_t noAlign = TLB::MustBeOne; 830 831 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 832 833 unsigned uopIdx = 0; 834 if (interleave) { 835 switch (elems) { 836 case 4: 837 assert(regs == 4); 838 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 839 size, machInst, rMid, vd * 2, inc * 2); 840 break; 841 case 3: 842 assert(regs == 3); 843 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 844 size, machInst, rMid, vd * 2, inc * 2); 845 break; 846 case 2: 847 assert(regs == 4 || regs == 2); 848 if (regs == 4) { 849 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 850 size, machInst, rMid, vd * 2, inc * 2); 851 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 852 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 853 } else { 854 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 855 size, machInst, rMid, vd * 2, inc * 2); 856 } 857 break; 858 default: 859 // Bad number of elements to interleave 860 microOps[uopIdx++] = new Unknown(machInst); 861 } 862 } 863 switch (regs) { 864 case 4: 865 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 866 size, machInst, rMid, rn, 0, align); 867 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 868 size, machInst, rMid + 4, rn, 16, noAlign); 869 break; 870 case 3: 871 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 872 size, machInst, rMid, rn, 0, align); 873 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 874 size, machInst, rMid + 4, rn, 16, noAlign); 875 break; 876 case 2: 877 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 878 size, machInst, rMid, rn, 0, align); 879 break; 880 case 1: 881 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 882 size, machInst, rMid, rn, 0, align); 883 break; 884 default: 885 // Unknown number of registers 886 microOps[uopIdx++] = new Unknown(machInst); 887 } 888 if (wb) { 889 if (rm != 15 && rm != 13) { 890 microOps[uopIdx++] = 891 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 892 } else { 893 microOps[uopIdx++] = 894 new MicroAddiUop(machInst, rn, rn, regs * 8); 895 } 896 } 897 assert(uopIdx == numMicroops); 898 899 for (unsigned i = 0; i < numMicroops - 1; i++) { 900 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 901 assert(uopPtr); 902 uopPtr->setDelayedCommit(); 903 } 904 microOps[numMicroops - 1]->setLastMicroop(); 905} 906 907VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, 908 OpClass __opClass, bool all, unsigned elems, 909 RegIndex rn, RegIndex vd, unsigned regs, 910 unsigned inc, uint32_t size, uint32_t align, 911 RegIndex rm, unsigned lane) : 912 PredMacroOp(mnem, machInst, __opClass) 913{ 914 assert(!all); 915 assert(regs > 0 && regs <= 4); 916 assert(regs % elems == 0); 917 918 unsigned eBytes = (1 << size); 919 unsigned storeSize = eBytes * elems; 920 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 921 sizeof(FloatRegBits); 922 923 assert(storeRegs > 0 && storeRegs <= 4); 924 925 numMicroops = 1; 926 bool wb = (rm != 15); 927 928 if (wb) numMicroops++; 929 numMicroops += (regs / elems); 930 microOps = new StaticInstPtr[numMicroops]; 931 932 RegIndex ufp0 = NumFloatV7ArchRegs; 933 934 unsigned uopIdx = 0; 935 switch (elems) { 936 case 4: 937 assert(regs == 4); 938 switch (size) { 939 case 0: 940 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 941 machInst, ufp0, vd * 2, inc * 2, lane); 942 break; 943 case 1: 944 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 945 machInst, ufp0, vd * 2, inc * 2, lane); 946 break; 947 case 2: 948 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 949 machInst, ufp0, vd * 2, inc * 2, lane); 950 break; 951 default: 952 // Bad size 953 microOps[uopIdx++] = new Unknown(machInst); 954 break; 955 } 956 break; 957 case 3: 958 assert(regs == 3); 959 switch (size) { 960 case 0: 961 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 962 machInst, ufp0, vd * 2, inc * 2, lane); 963 break; 964 case 1: 965 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 966 machInst, ufp0, vd * 2, inc * 2, lane); 967 break; 968 case 2: 969 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 970 machInst, ufp0, vd * 2, inc * 2, lane); 971 break; 972 default: 973 // Bad size 974 microOps[uopIdx++] = new Unknown(machInst); 975 break; 976 } 977 break; 978 case 2: 979 assert(regs == 2); 980 assert(storeRegs <= 2); 981 switch (size) { 982 case 0: 983 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 984 machInst, ufp0, vd * 2, inc * 2, lane); 985 break; 986 case 1: 987 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 988 machInst, ufp0, vd * 2, inc * 2, lane); 989 break; 990 case 2: 991 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 992 machInst, ufp0, vd * 2, inc * 2, lane); 993 break; 994 default: 995 // Bad size 996 microOps[uopIdx++] = new Unknown(machInst); 997 break; 998 } 999 break; 1000 case 1: 1001 assert(regs == 1 || (all && regs == 2)); 1002 assert(storeRegs <= 2); 1003 for (unsigned offset = 0; offset < regs; offset++) { 1004 switch (size) { 1005 case 0: 1006 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 1007 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1008 break; 1009 case 1: 1010 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 1011 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1012 break; 1013 case 2: 1014 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 1015 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 1016 break; 1017 default: 1018 // Bad size 1019 microOps[uopIdx++] = new Unknown(machInst); 1020 break; 1021 } 1022 } 1023 break; 1024 default: 1025 // Bad number of elements to unpack 1026 microOps[uopIdx++] = new Unknown(machInst); 1027 } 1028 switch (storeSize) { 1029 case 1: 1030 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 1031 machInst, ufp0, rn, 0, align); 1032 break; 1033 case 2: 1034 if (eBytes == 2) { 1035 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 1036 machInst, ufp0, rn, 0, align); 1037 } else { 1038 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 1039 machInst, ufp0, rn, 0, align); 1040 } 1041 break; 1042 case 3: 1043 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 1044 machInst, ufp0, rn, 0, align); 1045 break; 1046 case 4: 1047 switch (eBytes) { 1048 case 1: 1049 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 1050 machInst, ufp0, rn, 0, align); 1051 break; 1052 case 2: 1053 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1054 machInst, ufp0, rn, 0, align); 1055 break; 1056 case 4: 1057 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1058 machInst, ufp0, rn, 0, align); 1059 break; 1060 } 1061 break; 1062 case 6: 1063 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1064 machInst, ufp0, rn, 0, align); 1065 break; 1066 case 8: 1067 switch (eBytes) { 1068 case 2: 1069 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1070 machInst, ufp0, rn, 0, align); 1071 break; 1072 case 4: 1073 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1074 machInst, ufp0, rn, 0, align); 1075 break; 1076 } 1077 break; 1078 case 12: 1079 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1080 machInst, ufp0, rn, 0, align); 1081 break; 1082 case 16: 1083 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1084 machInst, ufp0, rn, 0, align); 1085 break; 1086 default: 1087 // Bad store size 1088 microOps[uopIdx++] = new Unknown(machInst); 1089 } 1090 if (wb) { 1091 if (rm != 15 && rm != 13) { 1092 microOps[uopIdx++] = 1093 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1094 } else { 1095 microOps[uopIdx++] = 1096 new MicroAddiUop(machInst, rn, rn, storeSize); 1097 } 1098 } 1099 assert(uopIdx == numMicroops); 1100 1101 for (unsigned i = 0; i < numMicroops - 1; i++) { 1102 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 1103 assert(uopPtr); 1104 uopPtr->setDelayedCommit(); 1105 } 1106 microOps[numMicroops - 1]->setLastMicroop(); 1107} 1108 1109VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, 1110 OpClass __opClass, RegIndex rn, RegIndex vd, 1111 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1112 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1113 PredMacroOp(mnem, machInst, __opClass) 1114{ 1115 RegIndex vx = NumFloatV8ArchRegs / 4; 1116 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1117 bool baseIsSP = isSP((IntRegIndex) rnsp); 1118 1119 numMicroops = wb ? 1 : 0; 1120 1121 int totNumBytes = numRegs * dataSize / 8; 1122 assert(totNumBytes <= 64); 1123 1124 // The guiding principle here is that no more than 16 bytes can be 1125 // transferred at a time 1126 int numMemMicroops = totNumBytes / 16; 1127 int residuum = totNumBytes % 16; 1128 if (residuum) 1129 ++numMemMicroops; 1130 numMicroops += numMemMicroops; 1131 1132 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1133 numMicroops += numMarshalMicroops; 1134 1135 microOps = new StaticInstPtr[numMicroops]; 1136 unsigned uopIdx = 0; 1137 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1138 TLB::AllowUnaligned; 1139 1140 int i = 0; 1141 for(; i < numMemMicroops - 1; ++i) { 1142 microOps[uopIdx++] = new MicroNeonLoad64( 1143 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1144 baseIsSP, 16 /* accSize */, eSize); 1145 } 1146 microOps[uopIdx++] = new MicroNeonLoad64( 1147 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1148 residuum ? residuum : 16 /* accSize */, eSize); 1149 1150 // Writeback microop: the post-increment amount is encoded in "Rm": a 1151 // 64-bit general register OR as '11111' for an immediate value equal to 1152 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1153 if (wb) { 1154 if (rm != ((RegIndex) INTREG_X31)) { 1155 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1156 UXTX, 0); 1157 } else { 1158 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1159 totNumBytes); 1160 } 1161 } 1162 1163 for (int i = 0; i < numMarshalMicroops; ++i) { 1164 switch(numRegs) { 1165 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( 1166 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1167 numStructElems, 1, i /* step */); 1168 break; 1169 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( 1170 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1171 numStructElems, 2, i /* step */); 1172 break; 1173 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( 1174 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1175 numStructElems, 3, i /* step */); 1176 break; 1177 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( 1178 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1179 numStructElems, 4, i /* step */); 1180 break; 1181 default: panic("Invalid number of registers"); 1182 } 1183 1184 } 1185 1186 assert(uopIdx == numMicroops); 1187 1188 for (int i = 0; i < numMicroops - 1; ++i) { 1189 microOps[i]->setDelayedCommit(); 1190 } 1191 microOps[numMicroops - 1]->setLastMicroop(); 1192} 1193 1194VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, 1195 OpClass __opClass, RegIndex rn, RegIndex vd, 1196 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1197 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1198 PredMacroOp(mnem, machInst, __opClass) 1199{ 1200 RegIndex vx = NumFloatV8ArchRegs / 4; 1201 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1202 bool baseIsSP = isSP((IntRegIndex) rnsp); 1203 1204 numMicroops = wb ? 1 : 0; 1205 1206 int totNumBytes = numRegs * dataSize / 8; 1207 assert(totNumBytes <= 64); 1208 1209 // The guiding principle here is that no more than 16 bytes can be 1210 // transferred at a time 1211 int numMemMicroops = totNumBytes / 16; 1212 int residuum = totNumBytes % 16; 1213 if (residuum) 1214 ++numMemMicroops; 1215 numMicroops += numMemMicroops; 1216 1217 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1218 numMicroops += numMarshalMicroops; 1219 1220 microOps = new StaticInstPtr[numMicroops]; 1221 unsigned uopIdx = 0; 1222 1223 for(int i = 0; i < numMarshalMicroops; ++i) { 1224 switch (numRegs) { 1225 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( 1226 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1227 numStructElems, 1, i /* step */); 1228 break; 1229 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( 1230 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1231 numStructElems, 2, i /* step */); 1232 break; 1233 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( 1234 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1235 numStructElems, 3, i /* step */); 1236 break; 1237 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( 1238 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1239 numStructElems, 4, i /* step */); 1240 break; 1241 default: panic("Invalid number of registers"); 1242 } 1243 } 1244 1245 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1246 TLB::AllowUnaligned; 1247 1248 int i = 0; 1249 for(; i < numMemMicroops - 1; ++i) { 1250 microOps[uopIdx++] = new MicroNeonStore64( 1251 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1252 baseIsSP, 16 /* accSize */, eSize); 1253 } 1254 microOps[uopIdx++] = new MicroNeonStore64( 1255 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1256 residuum ? residuum : 16 /* accSize */, eSize); 1257 1258 // Writeback microop: the post-increment amount is encoded in "Rm": a 1259 // 64-bit general register OR as '11111' for an immediate value equal to 1260 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1261 if (wb) { 1262 if (rm != ((RegIndex) INTREG_X31)) { 1263 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1264 UXTX, 0); 1265 } else { 1266 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1267 totNumBytes); 1268 } 1269 } 1270 1271 assert(uopIdx == numMicroops); 1272 1273 for (int i = 0; i < numMicroops - 1; i++) { 1274 microOps[i]->setDelayedCommit(); 1275 } 1276 microOps[numMicroops - 1]->setLastMicroop(); 1277} 1278 1279VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, 1280 OpClass __opClass, RegIndex rn, RegIndex vd, 1281 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1282 uint8_t numStructElems, uint8_t index, bool wb, 1283 bool replicate) : 1284 PredMacroOp(mnem, machInst, __opClass), 1285 eSize(0), dataSize(0), numStructElems(0), index(0), 1286 wb(false), replicate(false) 1287 1288{ 1289 RegIndex vx = NumFloatV8ArchRegs / 4; 1290 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1291 bool baseIsSP = isSP((IntRegIndex) rnsp); 1292 1293 numMicroops = wb ? 1 : 0; 1294 1295 int eSizeBytes = 1 << eSize; 1296 int totNumBytes = numStructElems * eSizeBytes; 1297 assert(totNumBytes <= 64); 1298 1299 // The guiding principle here is that no more than 16 bytes can be 1300 // transferred at a time 1301 int numMemMicroops = totNumBytes / 16; 1302 int residuum = totNumBytes % 16; 1303 if (residuum) 1304 ++numMemMicroops; 1305 numMicroops += numMemMicroops; 1306 1307 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1308 numMicroops += numMarshalMicroops; 1309 1310 microOps = new StaticInstPtr[numMicroops]; 1311 unsigned uopIdx = 0; 1312 1313 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1314 TLB::AllowUnaligned; 1315 1316 int i = 0; 1317 for (; i < numMemMicroops - 1; ++i) { 1318 microOps[uopIdx++] = new MicroNeonLoad64( 1319 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1320 baseIsSP, 16 /* accSize */, eSize); 1321 } 1322 microOps[uopIdx++] = new MicroNeonLoad64( 1323 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1324 residuum ? residuum : 16 /* accSize */, eSize); 1325 1326 // Writeback microop: the post-increment amount is encoded in "Rm": a 1327 // 64-bit general register OR as '11111' for an immediate value equal to 1328 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1329 if (wb) { 1330 if (rm != ((RegIndex) INTREG_X31)) { 1331 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1332 UXTX, 0); 1333 } else { 1334 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1335 totNumBytes); 1336 } 1337 } 1338 1339 for(int i = 0; i < numMarshalMicroops; ++i) { 1340 microOps[uopIdx++] = new MicroUnpackNeon64( 1341 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1342 numStructElems, index, i /* step */, replicate); 1343 } 1344 1345 assert(uopIdx == numMicroops); 1346 1347 for (int i = 0; i < numMicroops - 1; i++) { 1348 microOps[i]->setDelayedCommit(); 1349 } 1350 microOps[numMicroops - 1]->setLastMicroop(); 1351} 1352 1353VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, 1354 OpClass __opClass, RegIndex rn, RegIndex vd, 1355 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1356 uint8_t numStructElems, uint8_t index, bool wb, 1357 bool replicate) : 1358 PredMacroOp(mnem, machInst, __opClass), 1359 eSize(0), dataSize(0), numStructElems(0), index(0), 1360 wb(false), replicate(false) 1361{ 1362 RegIndex vx = NumFloatV8ArchRegs / 4; 1363 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1364 bool baseIsSP = isSP((IntRegIndex) rnsp); 1365 1366 numMicroops = wb ? 1 : 0; 1367 1368 int eSizeBytes = 1 << eSize; 1369 int totNumBytes = numStructElems * eSizeBytes; 1370 assert(totNumBytes <= 64); 1371 1372 // The guiding principle here is that no more than 16 bytes can be 1373 // transferred at a time 1374 int numMemMicroops = totNumBytes / 16; 1375 int residuum = totNumBytes % 16; 1376 if (residuum) 1377 ++numMemMicroops; 1378 numMicroops += numMemMicroops; 1379 1380 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1381 numMicroops += numMarshalMicroops; 1382 1383 microOps = new StaticInstPtr[numMicroops]; 1384 unsigned uopIdx = 0; 1385 1386 for(int i = 0; i < numMarshalMicroops; ++i) { 1387 microOps[uopIdx++] = new MicroPackNeon64( 1388 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1389 numStructElems, index, i /* step */, replicate); 1390 } 1391 1392 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1393 TLB::AllowUnaligned; 1394 1395 int i = 0; 1396 for(; i < numMemMicroops - 1; ++i) { 1397 microOps[uopIdx++] = new MicroNeonStore64( 1398 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1399 baseIsSP, 16 /* accsize */, eSize); 1400 } 1401 microOps[uopIdx++] = new MicroNeonStore64( 1402 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1403 residuum ? residuum : 16 /* accSize */, eSize); 1404 1405 // Writeback microop: the post-increment amount is encoded in "Rm": a 1406 // 64-bit general register OR as '11111' for an immediate value equal to 1407 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1408 if (wb) { 1409 if (rm != ((RegIndex) INTREG_X31)) { 1410 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1411 UXTX, 0); 1412 } else { 1413 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1414 totNumBytes); 1415 } 1416 } 1417 1418 assert(uopIdx == numMicroops); 1419 1420 for (int i = 0; i < numMicroops - 1; i++) { 1421 microOps[i]->setDelayedCommit(); 1422 } 1423 microOps[numMicroops - 1]->setLastMicroop(); 1424} 1425 1426MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, 1427 OpClass __opClass, IntRegIndex rn, 1428 RegIndex vd, bool single, bool up, 1429 bool writeback, bool load, uint32_t offset) : 1430 PredMacroOp(mnem, machInst, __opClass) 1431{ 1432 int i = 0; 1433 1434 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1435 // to be functionally identical except that fldmx is deprecated. For now 1436 // we'll assume they're otherwise interchangable. 1437 int count = (single ? offset : (offset / 2)); 1438 if (count == 0 || count > NumFloatV7ArchRegs) 1439 warn_once("Bad offset field for VFP load/store multiple.\n"); 1440 if (count == 0) { 1441 // Force there to be at least one microop so the macroop makes sense. 1442 writeback = true; 1443 } 1444 if (count > NumFloatV7ArchRegs) 1445 count = NumFloatV7ArchRegs; 1446 1447 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1448 microOps = new StaticInstPtr[numMicroops]; 1449 1450 int64_t addr = 0; 1451 1452 if (!up) 1453 addr = 4 * offset; 1454 1455 bool tempUp = up; 1456 for (int j = 0; j < count; j++) { 1457 if (load) { 1458 if (single) { 1459 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1460 tempUp, addr); 1461 } else { 1462 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1463 tempUp, addr); 1464 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1465 addr + (up ? 4 : -4)); 1466 } 1467 } else { 1468 if (single) { 1469 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1470 tempUp, addr); 1471 } else { 1472 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1473 tempUp, addr); 1474 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1475 addr + (up ? 4 : -4)); 1476 } 1477 } 1478 if (!tempUp) { 1479 addr -= (single ? 4 : 8); 1480 // The microops don't handle negative displacement, so turn if we 1481 // hit zero, flip polarity and start adding. 1482 if (addr <= 0) { 1483 tempUp = true; 1484 addr = -addr; 1485 } 1486 } else { 1487 addr += (single ? 4 : 8); 1488 } 1489 } 1490 1491 if (writeback) { 1492 if (up) { 1493 microOps[i++] = 1494 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1495 } else { 1496 microOps[i++] = 1497 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1498 } 1499 } 1500 1501 assert(numMicroops == i); 1502 microOps[numMicroops - 1]->setLastMicroop(); 1503 1504 for (StaticInstPtr *curUop = microOps; 1505 !(*curUop)->isLastMicroop(); curUop++) { 1506 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 1507 assert(uopPtr); 1508 uopPtr->setDelayedCommit(); 1509 } 1510} 1511 1512std::string 1513MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1514{ 1515 std::stringstream ss; 1516 printMnemonic(ss); 1517 printReg(ss, ura); 1518 ss << ", "; 1519 printReg(ss, urb); 1520 ss << ", "; 1521 ccprintf(ss, "#%d", imm); 1522 return ss.str(); 1523} 1524 1525std::string 1526MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1527{ 1528 std::stringstream ss; 1529 printMnemonic(ss); 1530 printReg(ss, ura); 1531 ss << ", "; 1532 printReg(ss, urb); 1533 ss << ", "; 1534 ccprintf(ss, "#%d", imm); 1535 return ss.str(); 1536} 1537 1538std::string 1539MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1540{ 1541 std::stringstream ss; 1542 printMnemonic(ss); 1543 ss << "[PC,CPSR]"; 1544 return ss.str(); 1545} 1546 1547std::string 1548MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1549{ 1550 std::stringstream ss; 1551 printMnemonic(ss); 1552 printReg(ss, ura); 1553 ccprintf(ss, ", "); 1554 printReg(ss, urb); 1555 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1556 return ss.str(); 1557} 1558 1559std::string 1560MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1561{ 1562 std::stringstream ss; 1563 printMnemonic(ss); 1564 printReg(ss, ura); 1565 ss << ", "; 1566 printReg(ss, urb); 1567 return ss.str(); 1568} 1569 1570std::string 1571MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1572{ 1573 std::stringstream ss; 1574 printMnemonic(ss); 1575 printReg(ss, ura); 1576 ss << ", "; 1577 printReg(ss, urb); 1578 ss << ", "; 1579 printReg(ss, urc); 1580 return ss.str(); 1581} 1582 1583std::string 1584MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1585{ 1586 std::stringstream ss; 1587 printMnemonic(ss); 1588 if (isFloating()) 1589 printReg(ss, ura + FP_Reg_Base); 1590 else 1591 printReg(ss, ura); 1592 ss << ", ["; 1593 printReg(ss, urb); 1594 ss << ", "; 1595 ccprintf(ss, "#%d", imm); 1596 ss << "]"; 1597 return ss.str(); 1598} 1599 1600std::string 1601MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1602{ 1603 std::stringstream ss; 1604 printMnemonic(ss); 1605 printReg(ss, dest); 1606 ss << ","; 1607 printReg(ss, dest2); 1608 ss << ", ["; 1609 printReg(ss, urb); 1610 ss << ", "; 1611 ccprintf(ss, "#%d", imm); 1612 ss << "]"; 1613 return ss.str(); 1614} 1615 1616} 1617