macromem.cc revision 10339
1/* 2 * Copyright (c) 2010-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2007-2008 The Florida State University 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Stephen Hines 41 */ 42 43#include <sstream> 44 45#include "arch/arm/insts/macromem.hh" 46 47#include "arch/arm/generated/decoder.hh" 48#include "arch/arm/insts/neon64_mem.hh" 49 50using namespace std; 51using namespace ArmISAInst; 52 53namespace ArmISA 54{ 55 56MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, 57 OpClass __opClass, IntRegIndex rn, 58 bool index, bool up, bool user, bool writeback, 59 bool load, uint32_t reglist) : 60 PredMacroOp(mnem, machInst, __opClass) 61{ 62 uint32_t regs = reglist; 63 uint32_t ones = number_of_ones(reglist); 64 // Remember that writeback adds a uop or two and the temp register adds one 65 numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1; 66 67 // It's technically legal to do a lot of nothing 68 if (!ones) 69 numMicroops = 1; 70 71 microOps = new StaticInstPtr[numMicroops]; 72 uint32_t addr = 0; 73 74 if (!up) 75 addr = (ones << 2) - 4; 76 77 if (!index) 78 addr += 4; 79 80 StaticInstPtr *uop = microOps; 81 82 // Add 0 to Rn and stick it in ureg0. 83 // This is equivalent to a move. 84 *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0); 85 86 unsigned reg = 0; 87 unsigned regIdx = 0; 88 bool force_user = user & !bits(reglist, 15); 89 bool exception_ret = user & bits(reglist, 15); 90 91 for (int i = 0; i < ones; i++) { 92 // Find the next register. 93 while (!bits(regs, reg)) 94 reg++; 95 replaceBits(regs, reg, 0); 96 97 regIdx = reg; 98 if (force_user) { 99 regIdx = intRegInMode(MODE_USER, regIdx); 100 } 101 102 if (load) { 103 if (writeback && i == ones - 1) { 104 // If it's a writeback and this is the last register 105 // do the load into a temporary register which we'll move 106 // into the final one later 107 *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0, 108 up, addr); 109 } else { 110 // Otherwise just do it normally 111 if (reg == INTREG_PC && exception_ret) { 112 // This must be the exception return form of ldm. 113 *++uop = new MicroLdrRetUop(machInst, regIdx, 114 INTREG_UREG0, up, addr); 115 if (!(condCode == COND_AL || condCode == COND_UC)) 116 (*uop)->setFlag(StaticInst::IsCondControl); 117 else 118 (*uop)->setFlag(StaticInst::IsUncondControl); 119 } else { 120 *++uop = new MicroLdrUop(machInst, regIdx, 121 INTREG_UREG0, up, addr); 122 if (reg == INTREG_PC) { 123 (*uop)->setFlag(StaticInst::IsControl); 124 if (!(condCode == COND_AL || condCode == COND_UC)) 125 (*uop)->setFlag(StaticInst::IsCondControl); 126 else 127 (*uop)->setFlag(StaticInst::IsUncondControl); 128 (*uop)->setFlag(StaticInst::IsIndirectControl); 129 } 130 } 131 } 132 } else { 133 *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr); 134 } 135 136 if (up) 137 addr += 4; 138 else 139 addr -= 4; 140 } 141 142 if (writeback && ones) { 143 // put the register update after we're done all loading 144 if (up) 145 *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4); 146 else 147 *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4); 148 149 // If this was a load move the last temporary value into place 150 // this way we can't take an exception after we update the base 151 // register. 152 if (load && reg == INTREG_PC && exception_ret) { 153 *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); 154 if (!(condCode == COND_AL || condCode == COND_UC)) 155 (*uop)->setFlag(StaticInst::IsCondControl); 156 else 157 (*uop)->setFlag(StaticInst::IsUncondControl); 158 } else if (load) { 159 *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1); 160 if (reg == INTREG_PC) { 161 (*uop)->setFlag(StaticInst::IsControl); 162 (*uop)->setFlag(StaticInst::IsCondControl); 163 (*uop)->setFlag(StaticInst::IsIndirectControl); 164 // This is created as a RAS POP 165 if (rn == INTREG_SP) 166 (*uop)->setFlag(StaticInst::IsReturn); 167 168 } 169 } 170 } 171 172 (*uop)->setLastMicroop(); 173 174 /* Take the control flags from the last microop for the macroop */ 175 if ((*uop)->isControl()) 176 setFlag(StaticInst::IsControl); 177 if ((*uop)->isCondCtrl()) 178 setFlag(StaticInst::IsCondControl); 179 if ((*uop)->isIndirectCtrl()) 180 setFlag(StaticInst::IsIndirectControl); 181 if ((*uop)->isReturn()) 182 setFlag(StaticInst::IsReturn); 183 184 for (StaticInstPtr *curUop = microOps; 185 !(*curUop)->isLastMicroop(); curUop++) { 186 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 187 assert(uopPtr); 188 uopPtr->setDelayedCommit(); 189 } 190} 191 192PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 193 uint32_t size, bool fp, bool load, bool noAlloc, 194 bool signExt, bool exclusive, bool acrel, 195 int64_t imm, AddrMode mode, 196 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : 197 PredMacroOp(mnem, machInst, __opClass) 198{ 199 bool writeback = (mode != AddrMd_Offset); 200 numMicroops = 1 + (size / 4) + (writeback ? 1 : 0); 201 microOps = new StaticInstPtr[numMicroops]; 202 203 StaticInstPtr *uop = microOps; 204 205 bool post = (mode == AddrMd_PostIndex); 206 207 rn = makeSP(rn); 208 209 *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm); 210 211 if (fp) { 212 if (size == 16) { 213 if (load) { 214 *++uop = new MicroLdrQBFpXImmUop(machInst, rt, 215 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 216 *++uop = new MicroLdrQTFpXImmUop(machInst, rt, 217 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 218 *++uop = new MicroLdrQBFpXImmUop(machInst, rt2, 219 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 220 *++uop = new MicroLdrQTFpXImmUop(machInst, rt2, 221 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 222 } else { 223 *++uop = new MicroStrQBFpXImmUop(machInst, rt, 224 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 225 *++uop = new MicroStrQTFpXImmUop(machInst, rt, 226 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 227 *++uop = new MicroStrQBFpXImmUop(machInst, rt2, 228 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 229 *++uop = new MicroStrQTFpXImmUop(machInst, rt2, 230 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 231 } 232 } else if (size == 8) { 233 if (load) { 234 *++uop = new MicroLdrFpXImmUop(machInst, rt, 235 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 236 *++uop = new MicroLdrFpXImmUop(machInst, rt2, 237 INTREG_UREG0, 8, noAlloc, exclusive, acrel); 238 } else { 239 *++uop = new MicroStrFpXImmUop(machInst, rt, 240 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 241 *++uop = new MicroStrFpXImmUop(machInst, rt2, 242 INTREG_UREG0, 8, noAlloc, exclusive, acrel); 243 } 244 } else if (size == 4) { 245 if (load) { 246 *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, 247 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 248 } else { 249 *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, 250 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 251 } 252 } 253 } else { 254 if (size == 8) { 255 if (load) { 256 *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, 257 0, noAlloc, exclusive, acrel); 258 *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, 259 size, noAlloc, exclusive, acrel); 260 } else { 261 *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0, 262 0, noAlloc, exclusive, acrel); 263 *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0, 264 size, noAlloc, exclusive, acrel); 265 } 266 } else if (size == 4) { 267 if (load) { 268 if (signExt) { 269 *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, 270 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 271 } else { 272 *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, 273 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 274 } 275 } else { 276 *++uop = new MicroStrDXImmUop(machInst, rt, rt2, 277 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 278 } 279 } 280 } 281 282 if (writeback) { 283 *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0, 284 post ? imm : 0); 285 } 286 287 (*uop)->setLastMicroop(); 288 289 for (StaticInstPtr *curUop = microOps; 290 !(*curUop)->isLastMicroop(); curUop++) { 291 (*curUop)->setDelayedCommit(); 292 } 293} 294 295BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, 296 OpClass __opClass, bool load, IntRegIndex dest, 297 IntRegIndex base, int64_t imm) : 298 PredMacroOp(mnem, machInst, __opClass) 299{ 300 numMicroops = 2; 301 microOps = new StaticInstPtr[numMicroops]; 302 303 if (load) { 304 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); 305 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); 306 } else { 307 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 308 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 309 } 310 microOps[0]->setDelayedCommit(); 311 microOps[1]->setLastMicroop(); 312} 313 314BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, 315 OpClass __opClass, bool load, IntRegIndex dest, 316 IntRegIndex base, int64_t imm) : 317 PredMacroOp(mnem, machInst, __opClass) 318{ 319 numMicroops = 3; 320 microOps = new StaticInstPtr[numMicroops]; 321 322 if (load) { 323 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); 324 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0); 325 } else { 326 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); 327 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0); 328 } 329 microOps[2] = new MicroAddXiUop(machInst, base, base, imm); 330 331 microOps[0]->setDelayedCommit(); 332 microOps[1]->setDelayedCommit(); 333 microOps[2]->setLastMicroop(); 334} 335 336BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, 337 OpClass __opClass, bool load, IntRegIndex dest, 338 IntRegIndex base, int64_t imm) : 339 PredMacroOp(mnem, machInst, __opClass) 340{ 341 numMicroops = 3; 342 microOps = new StaticInstPtr[numMicroops]; 343 344 if (load) { 345 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); 346 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); 347 } else { 348 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 349 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 350 } 351 microOps[2] = new MicroAddXiUop(machInst, base, base, imm); 352 353 microOps[0]->setDelayedCommit(); 354 microOps[1]->setDelayedCommit(); 355 microOps[2]->setLastMicroop(); 356} 357 358BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, 359 OpClass __opClass, bool load, IntRegIndex dest, 360 IntRegIndex base, IntRegIndex offset, 361 ArmExtendType type, int64_t imm) : 362 PredMacroOp(mnem, machInst, __opClass) 363{ 364 numMicroops = 2; 365 microOps = new StaticInstPtr[numMicroops]; 366 367 if (load) { 368 microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, 369 offset, type, imm); 370 microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, 371 offset, type, imm); 372 } else { 373 microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, 374 offset, type, imm); 375 microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, 376 offset, type, imm); 377 } 378 379 microOps[0]->setDelayedCommit(); 380 microOps[1]->setLastMicroop(); 381} 382 383BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, 384 OpClass __opClass, IntRegIndex dest, 385 int64_t imm) : 386 PredMacroOp(mnem, machInst, __opClass) 387{ 388 numMicroops = 2; 389 microOps = new StaticInstPtr[numMicroops]; 390 391 microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); 392 microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); 393 394 microOps[0]->setDelayedCommit(); 395 microOps[1]->setLastMicroop(); 396} 397 398VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 399 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 400 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 401 PredMacroOp(mnem, machInst, __opClass) 402{ 403 assert(regs > 0 && regs <= 4); 404 assert(regs % elems == 0); 405 406 numMicroops = (regs > 2) ? 2 : 1; 407 bool wb = (rm != 15); 408 bool deinterleave = (elems > 1); 409 410 if (wb) numMicroops++; 411 if (deinterleave) numMicroops += (regs / elems); 412 microOps = new StaticInstPtr[numMicroops]; 413 414 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 415 416 uint32_t noAlign = TLB::MustBeOne; 417 418 unsigned uopIdx = 0; 419 switch (regs) { 420 case 4: 421 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 422 size, machInst, rMid, rn, 0, align); 423 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 424 size, machInst, rMid + 4, rn, 16, noAlign); 425 break; 426 case 3: 427 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 428 size, machInst, rMid, rn, 0, align); 429 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 430 size, machInst, rMid + 4, rn, 16, noAlign); 431 break; 432 case 2: 433 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 434 size, machInst, rMid, rn, 0, align); 435 break; 436 case 1: 437 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 438 size, machInst, rMid, rn, 0, align); 439 break; 440 default: 441 // Unknown number of registers 442 microOps[uopIdx++] = new Unknown(machInst); 443 } 444 if (wb) { 445 if (rm != 15 && rm != 13) { 446 microOps[uopIdx++] = 447 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 448 } else { 449 microOps[uopIdx++] = 450 new MicroAddiUop(machInst, rn, rn, regs * 8); 451 } 452 } 453 if (deinterleave) { 454 switch (elems) { 455 case 4: 456 assert(regs == 4); 457 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 458 size, machInst, vd * 2, rMid, inc * 2); 459 break; 460 case 3: 461 assert(regs == 3); 462 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 463 size, machInst, vd * 2, rMid, inc * 2); 464 break; 465 case 2: 466 assert(regs == 4 || regs == 2); 467 if (regs == 4) { 468 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 469 size, machInst, vd * 2, rMid, inc * 2); 470 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 471 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 472 } else { 473 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 474 size, machInst, vd * 2, rMid, inc * 2); 475 } 476 break; 477 default: 478 // Bad number of elements to deinterleave 479 microOps[uopIdx++] = new Unknown(machInst); 480 } 481 } 482 assert(uopIdx == numMicroops); 483 484 for (unsigned i = 0; i < numMicroops - 1; i++) { 485 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 486 assert(uopPtr); 487 uopPtr->setDelayedCommit(); 488 } 489 microOps[numMicroops - 1]->setLastMicroop(); 490} 491 492VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, 493 OpClass __opClass, bool all, unsigned elems, 494 RegIndex rn, RegIndex vd, unsigned regs, 495 unsigned inc, uint32_t size, uint32_t align, 496 RegIndex rm, unsigned lane) : 497 PredMacroOp(mnem, machInst, __opClass) 498{ 499 assert(regs > 0 && regs <= 4); 500 assert(regs % elems == 0); 501 502 unsigned eBytes = (1 << size); 503 unsigned loadSize = eBytes * elems; 504 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 505 sizeof(FloatRegBits); 506 507 assert(loadRegs > 0 && loadRegs <= 4); 508 509 numMicroops = 1; 510 bool wb = (rm != 15); 511 512 if (wb) numMicroops++; 513 numMicroops += (regs / elems); 514 microOps = new StaticInstPtr[numMicroops]; 515 516 RegIndex ufp0 = NumFloatV7ArchRegs; 517 518 unsigned uopIdx = 0; 519 switch (loadSize) { 520 case 1: 521 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 522 machInst, ufp0, rn, 0, align); 523 break; 524 case 2: 525 if (eBytes == 2) { 526 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 527 machInst, ufp0, rn, 0, align); 528 } else { 529 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 530 machInst, ufp0, rn, 0, align); 531 } 532 break; 533 case 3: 534 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 535 machInst, ufp0, rn, 0, align); 536 break; 537 case 4: 538 switch (eBytes) { 539 case 1: 540 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 541 machInst, ufp0, rn, 0, align); 542 break; 543 case 2: 544 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 545 machInst, ufp0, rn, 0, align); 546 break; 547 case 4: 548 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 549 machInst, ufp0, rn, 0, align); 550 break; 551 } 552 break; 553 case 6: 554 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 555 machInst, ufp0, rn, 0, align); 556 break; 557 case 8: 558 switch (eBytes) { 559 case 2: 560 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 561 machInst, ufp0, rn, 0, align); 562 break; 563 case 4: 564 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 565 machInst, ufp0, rn, 0, align); 566 break; 567 } 568 break; 569 case 12: 570 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 571 machInst, ufp0, rn, 0, align); 572 break; 573 case 16: 574 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 575 machInst, ufp0, rn, 0, align); 576 break; 577 default: 578 // Unrecognized load size 579 microOps[uopIdx++] = new Unknown(machInst); 580 } 581 if (wb) { 582 if (rm != 15 && rm != 13) { 583 microOps[uopIdx++] = 584 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 585 } else { 586 microOps[uopIdx++] = 587 new MicroAddiUop(machInst, rn, rn, loadSize); 588 } 589 } 590 switch (elems) { 591 case 4: 592 assert(regs == 4); 593 switch (size) { 594 case 0: 595 if (all) { 596 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 597 machInst, vd * 2, ufp0, inc * 2); 598 } else { 599 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 600 machInst, vd * 2, ufp0, inc * 2, lane); 601 } 602 break; 603 case 1: 604 if (all) { 605 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 606 machInst, vd * 2, ufp0, inc * 2); 607 } else { 608 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 609 machInst, vd * 2, ufp0, inc * 2, lane); 610 } 611 break; 612 case 2: 613 if (all) { 614 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 615 machInst, vd * 2, ufp0, inc * 2); 616 } else { 617 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 618 machInst, vd * 2, ufp0, inc * 2, lane); 619 } 620 break; 621 default: 622 // Bad size 623 microOps[uopIdx++] = new Unknown(machInst); 624 break; 625 } 626 break; 627 case 3: 628 assert(regs == 3); 629 switch (size) { 630 case 0: 631 if (all) { 632 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 633 machInst, vd * 2, ufp0, inc * 2); 634 } else { 635 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 636 machInst, vd * 2, ufp0, inc * 2, lane); 637 } 638 break; 639 case 1: 640 if (all) { 641 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 642 machInst, vd * 2, ufp0, inc * 2); 643 } else { 644 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 645 machInst, vd * 2, ufp0, inc * 2, lane); 646 } 647 break; 648 case 2: 649 if (all) { 650 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 651 machInst, vd * 2, ufp0, inc * 2); 652 } else { 653 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 654 machInst, vd * 2, ufp0, inc * 2, lane); 655 } 656 break; 657 default: 658 // Bad size 659 microOps[uopIdx++] = new Unknown(machInst); 660 break; 661 } 662 break; 663 case 2: 664 assert(regs == 2); 665 assert(loadRegs <= 2); 666 switch (size) { 667 case 0: 668 if (all) { 669 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 670 machInst, vd * 2, ufp0, inc * 2); 671 } else { 672 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 673 machInst, vd * 2, ufp0, inc * 2, lane); 674 } 675 break; 676 case 1: 677 if (all) { 678 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 679 machInst, vd * 2, ufp0, inc * 2); 680 } else { 681 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 682 machInst, vd * 2, ufp0, inc * 2, lane); 683 } 684 break; 685 case 2: 686 if (all) { 687 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 688 machInst, vd * 2, ufp0, inc * 2); 689 } else { 690 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 691 machInst, vd * 2, ufp0, inc * 2, lane); 692 } 693 break; 694 default: 695 // Bad size 696 microOps[uopIdx++] = new Unknown(machInst); 697 break; 698 } 699 break; 700 case 1: 701 assert(regs == 1 || (all && regs == 2)); 702 assert(loadRegs <= 2); 703 for (unsigned offset = 0; offset < regs; offset++) { 704 switch (size) { 705 case 0: 706 if (all) { 707 microOps[uopIdx++] = 708 new MicroUnpackAllNeon2to2Uop<uint8_t>( 709 machInst, (vd + offset) * 2, ufp0, inc * 2); 710 } else { 711 microOps[uopIdx++] = 712 new MicroUnpackNeon2to2Uop<uint8_t>( 713 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 714 } 715 break; 716 case 1: 717 if (all) { 718 microOps[uopIdx++] = 719 new MicroUnpackAllNeon2to2Uop<uint16_t>( 720 machInst, (vd + offset) * 2, ufp0, inc * 2); 721 } else { 722 microOps[uopIdx++] = 723 new MicroUnpackNeon2to2Uop<uint16_t>( 724 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 725 } 726 break; 727 case 2: 728 if (all) { 729 microOps[uopIdx++] = 730 new MicroUnpackAllNeon2to2Uop<uint32_t>( 731 machInst, (vd + offset) * 2, ufp0, inc * 2); 732 } else { 733 microOps[uopIdx++] = 734 new MicroUnpackNeon2to2Uop<uint32_t>( 735 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 736 } 737 break; 738 default: 739 // Bad size 740 microOps[uopIdx++] = new Unknown(machInst); 741 break; 742 } 743 } 744 break; 745 default: 746 // Bad number of elements to unpack 747 microOps[uopIdx++] = new Unknown(machInst); 748 } 749 assert(uopIdx == numMicroops); 750 751 for (unsigned i = 0; i < numMicroops - 1; i++) { 752 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 753 assert(uopPtr); 754 uopPtr->setDelayedCommit(); 755 } 756 microOps[numMicroops - 1]->setLastMicroop(); 757} 758 759VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 760 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 761 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 762 PredMacroOp(mnem, machInst, __opClass) 763{ 764 assert(regs > 0 && regs <= 4); 765 assert(regs % elems == 0); 766 767 numMicroops = (regs > 2) ? 2 : 1; 768 bool wb = (rm != 15); 769 bool interleave = (elems > 1); 770 771 if (wb) numMicroops++; 772 if (interleave) numMicroops += (regs / elems); 773 microOps = new StaticInstPtr[numMicroops]; 774 775 uint32_t noAlign = TLB::MustBeOne; 776 777 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 778 779 unsigned uopIdx = 0; 780 if (interleave) { 781 switch (elems) { 782 case 4: 783 assert(regs == 4); 784 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 785 size, machInst, rMid, vd * 2, inc * 2); 786 break; 787 case 3: 788 assert(regs == 3); 789 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 790 size, machInst, rMid, vd * 2, inc * 2); 791 break; 792 case 2: 793 assert(regs == 4 || regs == 2); 794 if (regs == 4) { 795 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 796 size, machInst, rMid, vd * 2, inc * 2); 797 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 798 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 799 } else { 800 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 801 size, machInst, rMid, vd * 2, inc * 2); 802 } 803 break; 804 default: 805 // Bad number of elements to interleave 806 microOps[uopIdx++] = new Unknown(machInst); 807 } 808 } 809 switch (regs) { 810 case 4: 811 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 812 size, machInst, rMid, rn, 0, align); 813 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 814 size, machInst, rMid + 4, rn, 16, noAlign); 815 break; 816 case 3: 817 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 818 size, machInst, rMid, rn, 0, align); 819 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 820 size, machInst, rMid + 4, rn, 16, noAlign); 821 break; 822 case 2: 823 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 824 size, machInst, rMid, rn, 0, align); 825 break; 826 case 1: 827 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 828 size, machInst, rMid, rn, 0, align); 829 break; 830 default: 831 // Unknown number of registers 832 microOps[uopIdx++] = new Unknown(machInst); 833 } 834 if (wb) { 835 if (rm != 15 && rm != 13) { 836 microOps[uopIdx++] = 837 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 838 } else { 839 microOps[uopIdx++] = 840 new MicroAddiUop(machInst, rn, rn, regs * 8); 841 } 842 } 843 assert(uopIdx == numMicroops); 844 845 for (unsigned i = 0; i < numMicroops - 1; i++) { 846 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 847 assert(uopPtr); 848 uopPtr->setDelayedCommit(); 849 } 850 microOps[numMicroops - 1]->setLastMicroop(); 851} 852 853VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, 854 OpClass __opClass, bool all, unsigned elems, 855 RegIndex rn, RegIndex vd, unsigned regs, 856 unsigned inc, uint32_t size, uint32_t align, 857 RegIndex rm, unsigned lane) : 858 PredMacroOp(mnem, machInst, __opClass) 859{ 860 assert(!all); 861 assert(regs > 0 && regs <= 4); 862 assert(regs % elems == 0); 863 864 unsigned eBytes = (1 << size); 865 unsigned storeSize = eBytes * elems; 866 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 867 sizeof(FloatRegBits); 868 869 assert(storeRegs > 0 && storeRegs <= 4); 870 871 numMicroops = 1; 872 bool wb = (rm != 15); 873 874 if (wb) numMicroops++; 875 numMicroops += (regs / elems); 876 microOps = new StaticInstPtr[numMicroops]; 877 878 RegIndex ufp0 = NumFloatV7ArchRegs; 879 880 unsigned uopIdx = 0; 881 switch (elems) { 882 case 4: 883 assert(regs == 4); 884 switch (size) { 885 case 0: 886 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 887 machInst, ufp0, vd * 2, inc * 2, lane); 888 break; 889 case 1: 890 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 891 machInst, ufp0, vd * 2, inc * 2, lane); 892 break; 893 case 2: 894 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 895 machInst, ufp0, vd * 2, inc * 2, lane); 896 break; 897 default: 898 // Bad size 899 microOps[uopIdx++] = new Unknown(machInst); 900 break; 901 } 902 break; 903 case 3: 904 assert(regs == 3); 905 switch (size) { 906 case 0: 907 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 908 machInst, ufp0, vd * 2, inc * 2, lane); 909 break; 910 case 1: 911 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 912 machInst, ufp0, vd * 2, inc * 2, lane); 913 break; 914 case 2: 915 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 916 machInst, ufp0, vd * 2, inc * 2, lane); 917 break; 918 default: 919 // Bad size 920 microOps[uopIdx++] = new Unknown(machInst); 921 break; 922 } 923 break; 924 case 2: 925 assert(regs == 2); 926 assert(storeRegs <= 2); 927 switch (size) { 928 case 0: 929 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 930 machInst, ufp0, vd * 2, inc * 2, lane); 931 break; 932 case 1: 933 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 934 machInst, ufp0, vd * 2, inc * 2, lane); 935 break; 936 case 2: 937 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 938 machInst, ufp0, vd * 2, inc * 2, lane); 939 break; 940 default: 941 // Bad size 942 microOps[uopIdx++] = new Unknown(machInst); 943 break; 944 } 945 break; 946 case 1: 947 assert(regs == 1 || (all && regs == 2)); 948 assert(storeRegs <= 2); 949 for (unsigned offset = 0; offset < regs; offset++) { 950 switch (size) { 951 case 0: 952 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 953 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 954 break; 955 case 1: 956 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 957 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 958 break; 959 case 2: 960 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 961 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 962 break; 963 default: 964 // Bad size 965 microOps[uopIdx++] = new Unknown(machInst); 966 break; 967 } 968 } 969 break; 970 default: 971 // Bad number of elements to unpack 972 microOps[uopIdx++] = new Unknown(machInst); 973 } 974 switch (storeSize) { 975 case 1: 976 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 977 machInst, ufp0, rn, 0, align); 978 break; 979 case 2: 980 if (eBytes == 2) { 981 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 982 machInst, ufp0, rn, 0, align); 983 } else { 984 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 985 machInst, ufp0, rn, 0, align); 986 } 987 break; 988 case 3: 989 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 990 machInst, ufp0, rn, 0, align); 991 break; 992 case 4: 993 switch (eBytes) { 994 case 1: 995 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 996 machInst, ufp0, rn, 0, align); 997 break; 998 case 2: 999 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1000 machInst, ufp0, rn, 0, align); 1001 break; 1002 case 4: 1003 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1004 machInst, ufp0, rn, 0, align); 1005 break; 1006 } 1007 break; 1008 case 6: 1009 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1010 machInst, ufp0, rn, 0, align); 1011 break; 1012 case 8: 1013 switch (eBytes) { 1014 case 2: 1015 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1016 machInst, ufp0, rn, 0, align); 1017 break; 1018 case 4: 1019 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1020 machInst, ufp0, rn, 0, align); 1021 break; 1022 } 1023 break; 1024 case 12: 1025 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1026 machInst, ufp0, rn, 0, align); 1027 break; 1028 case 16: 1029 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1030 machInst, ufp0, rn, 0, align); 1031 break; 1032 default: 1033 // Bad store size 1034 microOps[uopIdx++] = new Unknown(machInst); 1035 } 1036 if (wb) { 1037 if (rm != 15 && rm != 13) { 1038 microOps[uopIdx++] = 1039 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1040 } else { 1041 microOps[uopIdx++] = 1042 new MicroAddiUop(machInst, rn, rn, storeSize); 1043 } 1044 } 1045 assert(uopIdx == numMicroops); 1046 1047 for (unsigned i = 0; i < numMicroops - 1; i++) { 1048 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 1049 assert(uopPtr); 1050 uopPtr->setDelayedCommit(); 1051 } 1052 microOps[numMicroops - 1]->setLastMicroop(); 1053} 1054 1055VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, 1056 OpClass __opClass, RegIndex rn, RegIndex vd, 1057 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1058 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1059 PredMacroOp(mnem, machInst, __opClass) 1060{ 1061 RegIndex vx = NumFloatV8ArchRegs / 4; 1062 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1063 bool baseIsSP = isSP((IntRegIndex) rnsp); 1064 1065 numMicroops = wb ? 1 : 0; 1066 1067 int totNumBytes = numRegs * dataSize / 8; 1068 assert(totNumBytes <= 64); 1069 1070 // The guiding principle here is that no more than 16 bytes can be 1071 // transferred at a time 1072 int numMemMicroops = totNumBytes / 16; 1073 int residuum = totNumBytes % 16; 1074 if (residuum) 1075 ++numMemMicroops; 1076 numMicroops += numMemMicroops; 1077 1078 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1079 numMicroops += numMarshalMicroops; 1080 1081 microOps = new StaticInstPtr[numMicroops]; 1082 unsigned uopIdx = 0; 1083 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1084 TLB::AllowUnaligned; 1085 1086 int i = 0; 1087 for(; i < numMemMicroops - 1; ++i) { 1088 microOps[uopIdx++] = new MicroNeonLoad64( 1089 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1090 baseIsSP, 16 /* accSize */, eSize); 1091 } 1092 microOps[uopIdx++] = new MicroNeonLoad64( 1093 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1094 residuum ? residuum : 16 /* accSize */, eSize); 1095 1096 // Writeback microop: the post-increment amount is encoded in "Rm": a 1097 // 64-bit general register OR as '11111' for an immediate value equal to 1098 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1099 if (wb) { 1100 if (rm != ((RegIndex) INTREG_X31)) { 1101 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1102 UXTX, 0); 1103 } else { 1104 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1105 totNumBytes); 1106 } 1107 } 1108 1109 for (int i = 0; i < numMarshalMicroops; ++i) { 1110 switch(numRegs) { 1111 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg( 1112 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1113 numStructElems, 1, i /* step */); 1114 break; 1115 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg( 1116 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1117 numStructElems, 2, i /* step */); 1118 break; 1119 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg( 1120 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1121 numStructElems, 3, i /* step */); 1122 break; 1123 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg( 1124 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1125 numStructElems, 4, i /* step */); 1126 break; 1127 default: panic("Invalid number of registers"); 1128 } 1129 1130 } 1131 1132 assert(uopIdx == numMicroops); 1133 1134 for (int i = 0; i < numMicroops - 1; ++i) { 1135 microOps[i]->setDelayedCommit(); 1136 } 1137 microOps[numMicroops - 1]->setLastMicroop(); 1138} 1139 1140VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, 1141 OpClass __opClass, RegIndex rn, RegIndex vd, 1142 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1143 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1144 PredMacroOp(mnem, machInst, __opClass) 1145{ 1146 RegIndex vx = NumFloatV8ArchRegs / 4; 1147 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1148 bool baseIsSP = isSP((IntRegIndex) rnsp); 1149 1150 numMicroops = wb ? 1 : 0; 1151 1152 int totNumBytes = numRegs * dataSize / 8; 1153 assert(totNumBytes <= 64); 1154 1155 // The guiding principle here is that no more than 16 bytes can be 1156 // transferred at a time 1157 int numMemMicroops = totNumBytes / 16; 1158 int residuum = totNumBytes % 16; 1159 if (residuum) 1160 ++numMemMicroops; 1161 numMicroops += numMemMicroops; 1162 1163 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1164 numMicroops += numMarshalMicroops; 1165 1166 microOps = new StaticInstPtr[numMicroops]; 1167 unsigned uopIdx = 0; 1168 1169 for(int i = 0; i < numMarshalMicroops; ++i) { 1170 switch (numRegs) { 1171 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg( 1172 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1173 numStructElems, 1, i /* step */); 1174 break; 1175 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg( 1176 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1177 numStructElems, 2, i /* step */); 1178 break; 1179 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg( 1180 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1181 numStructElems, 3, i /* step */); 1182 break; 1183 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg( 1184 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1185 numStructElems, 4, i /* step */); 1186 break; 1187 default: panic("Invalid number of registers"); 1188 } 1189 } 1190 1191 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1192 TLB::AllowUnaligned; 1193 1194 int i = 0; 1195 for(; i < numMemMicroops - 1; ++i) { 1196 microOps[uopIdx++] = new MicroNeonStore64( 1197 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1198 baseIsSP, 16 /* accSize */, eSize); 1199 } 1200 microOps[uopIdx++] = new MicroNeonStore64( 1201 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1202 residuum ? residuum : 16 /* accSize */, eSize); 1203 1204 // Writeback microop: the post-increment amount is encoded in "Rm": a 1205 // 64-bit general register OR as '11111' for an immediate value equal to 1206 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1207 if (wb) { 1208 if (rm != ((RegIndex) INTREG_X31)) { 1209 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1210 UXTX, 0); 1211 } else { 1212 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1213 totNumBytes); 1214 } 1215 } 1216 1217 assert(uopIdx == numMicroops); 1218 1219 for (int i = 0; i < numMicroops - 1; i++) { 1220 microOps[i]->setDelayedCommit(); 1221 } 1222 microOps[numMicroops - 1]->setLastMicroop(); 1223} 1224 1225VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, 1226 OpClass __opClass, RegIndex rn, RegIndex vd, 1227 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1228 uint8_t numStructElems, uint8_t index, bool wb, 1229 bool replicate) : 1230 PredMacroOp(mnem, machInst, __opClass) 1231{ 1232 RegIndex vx = NumFloatV8ArchRegs / 4; 1233 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1234 bool baseIsSP = isSP((IntRegIndex) rnsp); 1235 1236 numMicroops = wb ? 1 : 0; 1237 1238 int eSizeBytes = 1 << eSize; 1239 int totNumBytes = numStructElems * eSizeBytes; 1240 assert(totNumBytes <= 64); 1241 1242 // The guiding principle here is that no more than 16 bytes can be 1243 // transferred at a time 1244 int numMemMicroops = totNumBytes / 16; 1245 int residuum = totNumBytes % 16; 1246 if (residuum) 1247 ++numMemMicroops; 1248 numMicroops += numMemMicroops; 1249 1250 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1251 numMicroops += numMarshalMicroops; 1252 1253 microOps = new StaticInstPtr[numMicroops]; 1254 unsigned uopIdx = 0; 1255 1256 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1257 TLB::AllowUnaligned; 1258 1259 int i = 0; 1260 for (; i < numMemMicroops - 1; ++i) { 1261 microOps[uopIdx++] = new MicroNeonLoad64( 1262 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1263 baseIsSP, 16 /* accSize */, eSize); 1264 } 1265 microOps[uopIdx++] = new MicroNeonLoad64( 1266 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1267 residuum ? residuum : 16 /* accSize */, eSize); 1268 1269 // Writeback microop: the post-increment amount is encoded in "Rm": a 1270 // 64-bit general register OR as '11111' for an immediate value equal to 1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1272 if (wb) { 1273 if (rm != ((RegIndex) INTREG_X31)) { 1274 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1275 UXTX, 0); 1276 } else { 1277 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1278 totNumBytes); 1279 } 1280 } 1281 1282 for(int i = 0; i < numMarshalMicroops; ++i) { 1283 microOps[uopIdx++] = new MicroUnpackNeon64( 1284 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1285 numStructElems, index, i /* step */, replicate); 1286 } 1287 1288 assert(uopIdx == numMicroops); 1289 1290 for (int i = 0; i < numMicroops - 1; i++) { 1291 microOps[i]->setDelayedCommit(); 1292 } 1293 microOps[numMicroops - 1]->setLastMicroop(); 1294} 1295 1296VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, 1297 OpClass __opClass, RegIndex rn, RegIndex vd, 1298 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1299 uint8_t numStructElems, uint8_t index, bool wb, 1300 bool replicate) : 1301 PredMacroOp(mnem, machInst, __opClass) 1302{ 1303 RegIndex vx = NumFloatV8ArchRegs / 4; 1304 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1305 bool baseIsSP = isSP((IntRegIndex) rnsp); 1306 1307 numMicroops = wb ? 1 : 0; 1308 1309 int eSizeBytes = 1 << eSize; 1310 int totNumBytes = numStructElems * eSizeBytes; 1311 assert(totNumBytes <= 64); 1312 1313 // The guiding principle here is that no more than 16 bytes can be 1314 // transferred at a time 1315 int numMemMicroops = totNumBytes / 16; 1316 int residuum = totNumBytes % 16; 1317 if (residuum) 1318 ++numMemMicroops; 1319 numMicroops += numMemMicroops; 1320 1321 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1322 numMicroops += numMarshalMicroops; 1323 1324 microOps = new StaticInstPtr[numMicroops]; 1325 unsigned uopIdx = 0; 1326 1327 for(int i = 0; i < numMarshalMicroops; ++i) { 1328 microOps[uopIdx++] = new MicroPackNeon64( 1329 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1330 numStructElems, index, i /* step */, replicate); 1331 } 1332 1333 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1334 TLB::AllowUnaligned; 1335 1336 int i = 0; 1337 for(; i < numMemMicroops - 1; ++i) { 1338 microOps[uopIdx++] = new MicroNeonStore64( 1339 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1340 baseIsSP, 16 /* accsize */, eSize); 1341 } 1342 microOps[uopIdx++] = new MicroNeonStore64( 1343 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1344 residuum ? residuum : 16 /* accSize */, eSize); 1345 1346 // Writeback microop: the post-increment amount is encoded in "Rm": a 1347 // 64-bit general register OR as '11111' for an immediate value equal to 1348 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1349 if (wb) { 1350 if (rm != ((RegIndex) INTREG_X31)) { 1351 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1352 UXTX, 0); 1353 } else { 1354 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1355 totNumBytes); 1356 } 1357 } 1358 1359 assert(uopIdx == numMicroops); 1360 1361 for (int i = 0; i < numMicroops - 1; i++) { 1362 microOps[i]->setDelayedCommit(); 1363 } 1364 microOps[numMicroops - 1]->setLastMicroop(); 1365} 1366 1367MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, 1368 OpClass __opClass, IntRegIndex rn, 1369 RegIndex vd, bool single, bool up, 1370 bool writeback, bool load, uint32_t offset) : 1371 PredMacroOp(mnem, machInst, __opClass) 1372{ 1373 int i = 0; 1374 1375 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1376 // to be functionally identical except that fldmx is deprecated. For now 1377 // we'll assume they're otherwise interchangable. 1378 int count = (single ? offset : (offset / 2)); 1379 if (count == 0 || count > NumFloatV7ArchRegs) 1380 warn_once("Bad offset field for VFP load/store multiple.\n"); 1381 if (count == 0) { 1382 // Force there to be at least one microop so the macroop makes sense. 1383 writeback = true; 1384 } 1385 if (count > NumFloatV7ArchRegs) 1386 count = NumFloatV7ArchRegs; 1387 1388 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1389 microOps = new StaticInstPtr[numMicroops]; 1390 1391 int64_t addr = 0; 1392 1393 if (!up) 1394 addr = 4 * offset; 1395 1396 bool tempUp = up; 1397 for (int j = 0; j < count; j++) { 1398 if (load) { 1399 if (single) { 1400 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1401 tempUp, addr); 1402 } else { 1403 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1404 tempUp, addr); 1405 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1406 addr + (up ? 4 : -4)); 1407 } 1408 } else { 1409 if (single) { 1410 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1411 tempUp, addr); 1412 } else { 1413 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1414 tempUp, addr); 1415 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1416 addr + (up ? 4 : -4)); 1417 } 1418 } 1419 if (!tempUp) { 1420 addr -= (single ? 4 : 8); 1421 // The microops don't handle negative displacement, so turn if we 1422 // hit zero, flip polarity and start adding. 1423 if (addr <= 0) { 1424 tempUp = true; 1425 addr = -addr; 1426 } 1427 } else { 1428 addr += (single ? 4 : 8); 1429 } 1430 } 1431 1432 if (writeback) { 1433 if (up) { 1434 microOps[i++] = 1435 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1436 } else { 1437 microOps[i++] = 1438 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1439 } 1440 } 1441 1442 assert(numMicroops == i); 1443 microOps[numMicroops - 1]->setLastMicroop(); 1444 1445 for (StaticInstPtr *curUop = microOps; 1446 !(*curUop)->isLastMicroop(); curUop++) { 1447 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 1448 assert(uopPtr); 1449 uopPtr->setDelayedCommit(); 1450 } 1451} 1452 1453std::string 1454MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1455{ 1456 std::stringstream ss; 1457 printMnemonic(ss); 1458 printReg(ss, ura); 1459 ss << ", "; 1460 printReg(ss, urb); 1461 ss << ", "; 1462 ccprintf(ss, "#%d", imm); 1463 return ss.str(); 1464} 1465 1466std::string 1467MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1468{ 1469 std::stringstream ss; 1470 printMnemonic(ss); 1471 printReg(ss, ura); 1472 ss << ", "; 1473 printReg(ss, urb); 1474 ss << ", "; 1475 ccprintf(ss, "#%d", imm); 1476 return ss.str(); 1477} 1478 1479std::string 1480MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1481{ 1482 std::stringstream ss; 1483 printMnemonic(ss); 1484 ss << "[PC,CPSR]"; 1485 return ss.str(); 1486} 1487 1488std::string 1489MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1490{ 1491 std::stringstream ss; 1492 printMnemonic(ss); 1493 printReg(ss, ura); 1494 ccprintf(ss, ", "); 1495 printReg(ss, urb); 1496 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1497 return ss.str(); 1498} 1499 1500std::string 1501MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1502{ 1503 std::stringstream ss; 1504 printMnemonic(ss); 1505 printReg(ss, ura); 1506 ss << ", "; 1507 printReg(ss, urb); 1508 return ss.str(); 1509} 1510 1511std::string 1512MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1513{ 1514 std::stringstream ss; 1515 printMnemonic(ss); 1516 printReg(ss, ura); 1517 ss << ", "; 1518 printReg(ss, urb); 1519 ss << ", "; 1520 printReg(ss, urc); 1521 return ss.str(); 1522} 1523 1524std::string 1525MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1526{ 1527 std::stringstream ss; 1528 printMnemonic(ss); 1529 if (isFloating()) 1530 printReg(ss, ura + FP_Reg_Base); 1531 else 1532 printReg(ss, ura); 1533 ss << ", ["; 1534 printReg(ss, urb); 1535 ss << ", "; 1536 ccprintf(ss, "#%d", imm); 1537 ss << "]"; 1538 return ss.str(); 1539} 1540 1541} 1542