174 for (StaticInstPtr *curUop = microOps; 175 !(*curUop)->isLastMicroop(); curUop++) { 176 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 177 assert(uopPtr); 178 uopPtr->setDelayedCommit(); 179 } 180} 181 182PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 183 uint32_t size, bool fp, bool load, bool noAlloc, 184 bool signExt, bool exclusive, bool acrel, 185 int64_t imm, AddrMode mode, 186 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : 187 PredMacroOp(mnem, machInst, __opClass) 188{ 189 bool writeback = (mode != AddrMd_Offset); 190 numMicroops = 1 + (size / 4) + (writeback ? 1 : 0); 191 microOps = new StaticInstPtr[numMicroops]; 192 193 StaticInstPtr *uop = microOps; 194 195 bool post = (mode == AddrMd_PostIndex); 196 197 rn = makeSP(rn); 198 199 *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm); 200 201 if (fp) { 202 if (size == 16) { 203 if (load) { 204 *++uop = new MicroLdrQBFpXImmUop(machInst, rt, 205 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 206 *++uop = new MicroLdrQTFpXImmUop(machInst, rt, 207 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 208 *++uop = new MicroLdrQBFpXImmUop(machInst, rt2, 209 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 210 *++uop = new MicroLdrQTFpXImmUop(machInst, rt2, 211 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 212 } else { 213 *++uop = new MicroStrQBFpXImmUop(machInst, rt, 214 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 215 *++uop = new MicroStrQTFpXImmUop(machInst, rt, 216 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 217 *++uop = new MicroStrQBFpXImmUop(machInst, rt2, 218 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 219 *++uop = new MicroStrQTFpXImmUop(machInst, rt2, 220 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 221 } 222 } else if (size == 8) { 223 if (load) { 224 *++uop = new MicroLdrFpXImmUop(machInst, rt, 225 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 226 *++uop = new MicroLdrFpXImmUop(machInst, rt2, 227 INTREG_UREG0, 8, noAlloc, exclusive, acrel); 228 } else { 229 *++uop = new MicroStrFpXImmUop(machInst, rt, 230 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 231 *++uop = new MicroStrFpXImmUop(machInst, rt2, 232 INTREG_UREG0, 8, noAlloc, exclusive, acrel); 233 } 234 } else if (size == 4) { 235 if (load) { 236 *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, 237 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 238 } else { 239 *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, 240 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 241 } 242 } 243 } else { 244 if (size == 8) { 245 if (load) { 246 *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, 247 0, noAlloc, exclusive, acrel); 248 *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, 249 size, noAlloc, exclusive, acrel); 250 } else { 251 *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0, 252 0, noAlloc, exclusive, acrel); 253 *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0, 254 size, noAlloc, exclusive, acrel); 255 } 256 } else if (size == 4) { 257 if (load) { 258 if (signExt) { 259 *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, 260 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 261 } else { 262 *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, 263 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 264 } 265 } else { 266 *++uop = new MicroStrDXImmUop(machInst, rt, rt2, 267 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 268 } 269 } 270 } 271 272 if (writeback) { 273 *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0, 274 post ? imm : 0); 275 } 276 277 (*uop)->setLastMicroop(); 278 279 for (StaticInstPtr *curUop = microOps; 280 !(*curUop)->isLastMicroop(); curUop++) { 281 (*curUop)->setDelayedCommit(); 282 } 283} 284 285BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, 286 OpClass __opClass, bool load, IntRegIndex dest, 287 IntRegIndex base, int64_t imm) : 288 PredMacroOp(mnem, machInst, __opClass) 289{ 290 numMicroops = 2; 291 microOps = new StaticInstPtr[numMicroops]; 292 293 if (load) { 294 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); 295 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); 296 } else { 297 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 298 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 299 } 300 microOps[0]->setDelayedCommit(); 301 microOps[1]->setLastMicroop(); 302} 303 304BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, 305 OpClass __opClass, bool load, IntRegIndex dest, 306 IntRegIndex base, int64_t imm) : 307 PredMacroOp(mnem, machInst, __opClass) 308{ 309 numMicroops = 3; 310 microOps = new StaticInstPtr[numMicroops]; 311 312 if (load) { 313 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); 314 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0); 315 } else { 316 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); 317 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0); 318 } 319 microOps[2] = new MicroAddXiUop(machInst, base, base, imm); 320 321 microOps[0]->setDelayedCommit(); 322 microOps[1]->setDelayedCommit(); 323 microOps[2]->setLastMicroop(); 324} 325 326BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, 327 OpClass __opClass, bool load, IntRegIndex dest, 328 IntRegIndex base, int64_t imm) : 329 PredMacroOp(mnem, machInst, __opClass) 330{ 331 numMicroops = 3; 332 microOps = new StaticInstPtr[numMicroops]; 333 334 if (load) { 335 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); 336 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); 337 } else { 338 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 339 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 340 } 341 microOps[2] = new MicroAddXiUop(machInst, base, base, imm); 342 343 microOps[0]->setDelayedCommit(); 344 microOps[1]->setDelayedCommit(); 345 microOps[2]->setLastMicroop(); 346} 347 348BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, 349 OpClass __opClass, bool load, IntRegIndex dest, 350 IntRegIndex base, IntRegIndex offset, 351 ArmExtendType type, int64_t imm) : 352 PredMacroOp(mnem, machInst, __opClass) 353{ 354 numMicroops = 2; 355 microOps = new StaticInstPtr[numMicroops]; 356 357 if (load) { 358 microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, 359 offset, type, imm); 360 microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, 361 offset, type, imm); 362 } else { 363 microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, 364 offset, type, imm); 365 microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, 366 offset, type, imm); 367 } 368 369 microOps[0]->setDelayedCommit(); 370 microOps[1]->setLastMicroop(); 371} 372 373BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, 374 OpClass __opClass, IntRegIndex dest, 375 int64_t imm) : 376 PredMacroOp(mnem, machInst, __opClass) 377{ 378 numMicroops = 2; 379 microOps = new StaticInstPtr[numMicroops]; 380 381 microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); 382 microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); 383 384 microOps[0]->setDelayedCommit(); 385 microOps[1]->setLastMicroop(); 386} 387 388VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 389 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 390 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 391 PredMacroOp(mnem, machInst, __opClass) 392{ 393 assert(regs > 0 && regs <= 4); 394 assert(regs % elems == 0); 395 396 numMicroops = (regs > 2) ? 2 : 1; 397 bool wb = (rm != 15); 398 bool deinterleave = (elems > 1); 399 400 if (wb) numMicroops++; 401 if (deinterleave) numMicroops += (regs / elems); 402 microOps = new StaticInstPtr[numMicroops]; 403 404 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 405 406 uint32_t noAlign = TLB::MustBeOne; 407 408 unsigned uopIdx = 0; 409 switch (regs) { 410 case 4: 411 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 412 size, machInst, rMid, rn, 0, align); 413 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 414 size, machInst, rMid + 4, rn, 16, noAlign); 415 break; 416 case 3: 417 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 418 size, machInst, rMid, rn, 0, align); 419 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 420 size, machInst, rMid + 4, rn, 16, noAlign); 421 break; 422 case 2: 423 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 424 size, machInst, rMid, rn, 0, align); 425 break; 426 case 1: 427 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 428 size, machInst, rMid, rn, 0, align); 429 break; 430 default: 431 // Unknown number of registers 432 microOps[uopIdx++] = new Unknown(machInst); 433 } 434 if (wb) { 435 if (rm != 15 && rm != 13) { 436 microOps[uopIdx++] = 437 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 438 } else { 439 microOps[uopIdx++] = 440 new MicroAddiUop(machInst, rn, rn, regs * 8); 441 } 442 } 443 if (deinterleave) { 444 switch (elems) { 445 case 4: 446 assert(regs == 4); 447 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 448 size, machInst, vd * 2, rMid, inc * 2); 449 break; 450 case 3: 451 assert(regs == 3); 452 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 453 size, machInst, vd * 2, rMid, inc * 2); 454 break; 455 case 2: 456 assert(regs == 4 || regs == 2); 457 if (regs == 4) { 458 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 459 size, machInst, vd * 2, rMid, inc * 2); 460 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 461 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 462 } else { 463 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 464 size, machInst, vd * 2, rMid, inc * 2); 465 } 466 break; 467 default: 468 // Bad number of elements to deinterleave 469 microOps[uopIdx++] = new Unknown(machInst); 470 } 471 } 472 assert(uopIdx == numMicroops); 473 474 for (unsigned i = 0; i < numMicroops - 1; i++) { 475 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 476 assert(uopPtr); 477 uopPtr->setDelayedCommit(); 478 } 479 microOps[numMicroops - 1]->setLastMicroop(); 480} 481 482VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, 483 OpClass __opClass, bool all, unsigned elems, 484 RegIndex rn, RegIndex vd, unsigned regs, 485 unsigned inc, uint32_t size, uint32_t align, 486 RegIndex rm, unsigned lane) : 487 PredMacroOp(mnem, machInst, __opClass) 488{ 489 assert(regs > 0 && regs <= 4); 490 assert(regs % elems == 0); 491 492 unsigned eBytes = (1 << size); 493 unsigned loadSize = eBytes * elems; 494 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 495 sizeof(FloatRegBits); 496 497 assert(loadRegs > 0 && loadRegs <= 4); 498 499 numMicroops = 1; 500 bool wb = (rm != 15); 501 502 if (wb) numMicroops++; 503 numMicroops += (regs / elems); 504 microOps = new StaticInstPtr[numMicroops]; 505 506 RegIndex ufp0 = NumFloatV7ArchRegs; 507 508 unsigned uopIdx = 0; 509 switch (loadSize) { 510 case 1: 511 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 512 machInst, ufp0, rn, 0, align); 513 break; 514 case 2: 515 if (eBytes == 2) { 516 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 517 machInst, ufp0, rn, 0, align); 518 } else { 519 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 520 machInst, ufp0, rn, 0, align); 521 } 522 break; 523 case 3: 524 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 525 machInst, ufp0, rn, 0, align); 526 break; 527 case 4: 528 switch (eBytes) { 529 case 1: 530 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 531 machInst, ufp0, rn, 0, align); 532 break; 533 case 2: 534 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 535 machInst, ufp0, rn, 0, align); 536 break; 537 case 4: 538 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 539 machInst, ufp0, rn, 0, align); 540 break; 541 } 542 break; 543 case 6: 544 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 545 machInst, ufp0, rn, 0, align); 546 break; 547 case 8: 548 switch (eBytes) { 549 case 2: 550 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 551 machInst, ufp0, rn, 0, align); 552 break; 553 case 4: 554 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 555 machInst, ufp0, rn, 0, align); 556 break; 557 } 558 break; 559 case 12: 560 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 561 machInst, ufp0, rn, 0, align); 562 break; 563 case 16: 564 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 565 machInst, ufp0, rn, 0, align); 566 break; 567 default: 568 // Unrecognized load size 569 microOps[uopIdx++] = new Unknown(machInst); 570 } 571 if (wb) { 572 if (rm != 15 && rm != 13) { 573 microOps[uopIdx++] = 574 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 575 } else { 576 microOps[uopIdx++] = 577 new MicroAddiUop(machInst, rn, rn, loadSize); 578 } 579 } 580 switch (elems) { 581 case 4: 582 assert(regs == 4); 583 switch (size) { 584 case 0: 585 if (all) { 586 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 587 machInst, vd * 2, ufp0, inc * 2); 588 } else { 589 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 590 machInst, vd * 2, ufp0, inc * 2, lane); 591 } 592 break; 593 case 1: 594 if (all) { 595 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 596 machInst, vd * 2, ufp0, inc * 2); 597 } else { 598 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 599 machInst, vd * 2, ufp0, inc * 2, lane); 600 } 601 break; 602 case 2: 603 if (all) { 604 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 605 machInst, vd * 2, ufp0, inc * 2); 606 } else { 607 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 608 machInst, vd * 2, ufp0, inc * 2, lane); 609 } 610 break; 611 default: 612 // Bad size 613 microOps[uopIdx++] = new Unknown(machInst); 614 break; 615 } 616 break; 617 case 3: 618 assert(regs == 3); 619 switch (size) { 620 case 0: 621 if (all) { 622 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 623 machInst, vd * 2, ufp0, inc * 2); 624 } else { 625 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 626 machInst, vd * 2, ufp0, inc * 2, lane); 627 } 628 break; 629 case 1: 630 if (all) { 631 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 632 machInst, vd * 2, ufp0, inc * 2); 633 } else { 634 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 635 machInst, vd * 2, ufp0, inc * 2, lane); 636 } 637 break; 638 case 2: 639 if (all) { 640 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 641 machInst, vd * 2, ufp0, inc * 2); 642 } else { 643 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 644 machInst, vd * 2, ufp0, inc * 2, lane); 645 } 646 break; 647 default: 648 // Bad size 649 microOps[uopIdx++] = new Unknown(machInst); 650 break; 651 } 652 break; 653 case 2: 654 assert(regs == 2); 655 assert(loadRegs <= 2); 656 switch (size) { 657 case 0: 658 if (all) { 659 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 660 machInst, vd * 2, ufp0, inc * 2); 661 } else { 662 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 663 machInst, vd * 2, ufp0, inc * 2, lane); 664 } 665 break; 666 case 1: 667 if (all) { 668 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 669 machInst, vd * 2, ufp0, inc * 2); 670 } else { 671 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 672 machInst, vd * 2, ufp0, inc * 2, lane); 673 } 674 break; 675 case 2: 676 if (all) { 677 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 678 machInst, vd * 2, ufp0, inc * 2); 679 } else { 680 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 681 machInst, vd * 2, ufp0, inc * 2, lane); 682 } 683 break; 684 default: 685 // Bad size 686 microOps[uopIdx++] = new Unknown(machInst); 687 break; 688 } 689 break; 690 case 1: 691 assert(regs == 1 || (all && regs == 2)); 692 assert(loadRegs <= 2); 693 for (unsigned offset = 0; offset < regs; offset++) { 694 switch (size) { 695 case 0: 696 if (all) { 697 microOps[uopIdx++] = 698 new MicroUnpackAllNeon2to2Uop<uint8_t>( 699 machInst, (vd + offset) * 2, ufp0, inc * 2); 700 } else { 701 microOps[uopIdx++] = 702 new MicroUnpackNeon2to2Uop<uint8_t>( 703 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 704 } 705 break; 706 case 1: 707 if (all) { 708 microOps[uopIdx++] = 709 new MicroUnpackAllNeon2to2Uop<uint16_t>( 710 machInst, (vd + offset) * 2, ufp0, inc * 2); 711 } else { 712 microOps[uopIdx++] = 713 new MicroUnpackNeon2to2Uop<uint16_t>( 714 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 715 } 716 break; 717 case 2: 718 if (all) { 719 microOps[uopIdx++] = 720 new MicroUnpackAllNeon2to2Uop<uint32_t>( 721 machInst, (vd + offset) * 2, ufp0, inc * 2); 722 } else { 723 microOps[uopIdx++] = 724 new MicroUnpackNeon2to2Uop<uint32_t>( 725 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 726 } 727 break; 728 default: 729 // Bad size 730 microOps[uopIdx++] = new Unknown(machInst); 731 break; 732 } 733 } 734 break; 735 default: 736 // Bad number of elements to unpack 737 microOps[uopIdx++] = new Unknown(machInst); 738 } 739 assert(uopIdx == numMicroops); 740 741 for (unsigned i = 0; i < numMicroops - 1; i++) { 742 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 743 assert(uopPtr); 744 uopPtr->setDelayedCommit(); 745 } 746 microOps[numMicroops - 1]->setLastMicroop(); 747} 748 749VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 750 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 751 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 752 PredMacroOp(mnem, machInst, __opClass) 753{ 754 assert(regs > 0 && regs <= 4); 755 assert(regs % elems == 0); 756 757 numMicroops = (regs > 2) ? 2 : 1; 758 bool wb = (rm != 15); 759 bool interleave = (elems > 1); 760 761 if (wb) numMicroops++; 762 if (interleave) numMicroops += (regs / elems); 763 microOps = new StaticInstPtr[numMicroops]; 764 765 uint32_t noAlign = TLB::MustBeOne; 766 767 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 768 769 unsigned uopIdx = 0; 770 if (interleave) { 771 switch (elems) { 772 case 4: 773 assert(regs == 4); 774 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 775 size, machInst, rMid, vd * 2, inc * 2); 776 break; 777 case 3: 778 assert(regs == 3); 779 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 780 size, machInst, rMid, vd * 2, inc * 2); 781 break; 782 case 2: 783 assert(regs == 4 || regs == 2); 784 if (regs == 4) { 785 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 786 size, machInst, rMid, vd * 2, inc * 2); 787 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 788 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 789 } else { 790 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 791 size, machInst, rMid, vd * 2, inc * 2); 792 } 793 break; 794 default: 795 // Bad number of elements to interleave 796 microOps[uopIdx++] = new Unknown(machInst); 797 } 798 } 799 switch (regs) { 800 case 4: 801 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 802 size, machInst, rMid, rn, 0, align); 803 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 804 size, machInst, rMid + 4, rn, 16, noAlign); 805 break; 806 case 3: 807 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 808 size, machInst, rMid, rn, 0, align); 809 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 810 size, machInst, rMid + 4, rn, 16, noAlign); 811 break; 812 case 2: 813 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 814 size, machInst, rMid, rn, 0, align); 815 break; 816 case 1: 817 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 818 size, machInst, rMid, rn, 0, align); 819 break; 820 default: 821 // Unknown number of registers 822 microOps[uopIdx++] = new Unknown(machInst); 823 } 824 if (wb) { 825 if (rm != 15 && rm != 13) { 826 microOps[uopIdx++] = 827 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 828 } else { 829 microOps[uopIdx++] = 830 new MicroAddiUop(machInst, rn, rn, regs * 8); 831 } 832 } 833 assert(uopIdx == numMicroops); 834 835 for (unsigned i = 0; i < numMicroops - 1; i++) { 836 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 837 assert(uopPtr); 838 uopPtr->setDelayedCommit(); 839 } 840 microOps[numMicroops - 1]->setLastMicroop(); 841} 842 843VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, 844 OpClass __opClass, bool all, unsigned elems, 845 RegIndex rn, RegIndex vd, unsigned regs, 846 unsigned inc, uint32_t size, uint32_t align, 847 RegIndex rm, unsigned lane) : 848 PredMacroOp(mnem, machInst, __opClass) 849{ 850 assert(!all); 851 assert(regs > 0 && regs <= 4); 852 assert(regs % elems == 0); 853 854 unsigned eBytes = (1 << size); 855 unsigned storeSize = eBytes * elems; 856 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 857 sizeof(FloatRegBits); 858 859 assert(storeRegs > 0 && storeRegs <= 4); 860 861 numMicroops = 1; 862 bool wb = (rm != 15); 863 864 if (wb) numMicroops++; 865 numMicroops += (regs / elems); 866 microOps = new StaticInstPtr[numMicroops]; 867 868 RegIndex ufp0 = NumFloatV7ArchRegs; 869 870 unsigned uopIdx = 0; 871 switch (elems) { 872 case 4: 873 assert(regs == 4); 874 switch (size) { 875 case 0: 876 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 877 machInst, ufp0, vd * 2, inc * 2, lane); 878 break; 879 case 1: 880 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 881 machInst, ufp0, vd * 2, inc * 2, lane); 882 break; 883 case 2: 884 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 885 machInst, ufp0, vd * 2, inc * 2, lane); 886 break; 887 default: 888 // Bad size 889 microOps[uopIdx++] = new Unknown(machInst); 890 break; 891 } 892 break; 893 case 3: 894 assert(regs == 3); 895 switch (size) { 896 case 0: 897 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 898 machInst, ufp0, vd * 2, inc * 2, lane); 899 break; 900 case 1: 901 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 902 machInst, ufp0, vd * 2, inc * 2, lane); 903 break; 904 case 2: 905 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 906 machInst, ufp0, vd * 2, inc * 2, lane); 907 break; 908 default: 909 // Bad size 910 microOps[uopIdx++] = new Unknown(machInst); 911 break; 912 } 913 break; 914 case 2: 915 assert(regs == 2); 916 assert(storeRegs <= 2); 917 switch (size) { 918 case 0: 919 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 920 machInst, ufp0, vd * 2, inc * 2, lane); 921 break; 922 case 1: 923 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 924 machInst, ufp0, vd * 2, inc * 2, lane); 925 break; 926 case 2: 927 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 928 machInst, ufp0, vd * 2, inc * 2, lane); 929 break; 930 default: 931 // Bad size 932 microOps[uopIdx++] = new Unknown(machInst); 933 break; 934 } 935 break; 936 case 1: 937 assert(regs == 1 || (all && regs == 2)); 938 assert(storeRegs <= 2); 939 for (unsigned offset = 0; offset < regs; offset++) { 940 switch (size) { 941 case 0: 942 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 943 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 944 break; 945 case 1: 946 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 947 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 948 break; 949 case 2: 950 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 951 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 952 break; 953 default: 954 // Bad size 955 microOps[uopIdx++] = new Unknown(machInst); 956 break; 957 } 958 } 959 break; 960 default: 961 // Bad number of elements to unpack 962 microOps[uopIdx++] = new Unknown(machInst); 963 } 964 switch (storeSize) { 965 case 1: 966 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 967 machInst, ufp0, rn, 0, align); 968 break; 969 case 2: 970 if (eBytes == 2) { 971 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 972 machInst, ufp0, rn, 0, align); 973 } else { 974 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 975 machInst, ufp0, rn, 0, align); 976 } 977 break; 978 case 3: 979 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 980 machInst, ufp0, rn, 0, align); 981 break; 982 case 4: 983 switch (eBytes) { 984 case 1: 985 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 986 machInst, ufp0, rn, 0, align); 987 break; 988 case 2: 989 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 990 machInst, ufp0, rn, 0, align); 991 break; 992 case 4: 993 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 994 machInst, ufp0, rn, 0, align); 995 break; 996 } 997 break; 998 case 6: 999 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1000 machInst, ufp0, rn, 0, align); 1001 break; 1002 case 8: 1003 switch (eBytes) { 1004 case 2: 1005 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1006 machInst, ufp0, rn, 0, align); 1007 break; 1008 case 4: 1009 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1010 machInst, ufp0, rn, 0, align); 1011 break; 1012 } 1013 break; 1014 case 12: 1015 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1016 machInst, ufp0, rn, 0, align); 1017 break; 1018 case 16: 1019 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1020 machInst, ufp0, rn, 0, align); 1021 break; 1022 default: 1023 // Bad store size 1024 microOps[uopIdx++] = new Unknown(machInst); 1025 } 1026 if (wb) { 1027 if (rm != 15 && rm != 13) { 1028 microOps[uopIdx++] = 1029 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1030 } else { 1031 microOps[uopIdx++] = 1032 new MicroAddiUop(machInst, rn, rn, storeSize); 1033 } 1034 } 1035 assert(uopIdx == numMicroops); 1036 1037 for (unsigned i = 0; i < numMicroops - 1; i++) { 1038 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 1039 assert(uopPtr); 1040 uopPtr->setDelayedCommit(); 1041 } 1042 microOps[numMicroops - 1]->setLastMicroop(); 1043} 1044 1045VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, 1046 OpClass __opClass, RegIndex rn, RegIndex vd, 1047 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1048 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1049 PredMacroOp(mnem, machInst, __opClass) 1050{ 1051 RegIndex vx = NumFloatV8ArchRegs / 4; 1052 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1053 bool baseIsSP = isSP((IntRegIndex) rnsp); 1054 1055 numMicroops = wb ? 1 : 0; 1056 1057 int totNumBytes = numRegs * dataSize / 8; 1058 assert(totNumBytes <= 64); 1059 1060 // The guiding principle here is that no more than 16 bytes can be 1061 // transferred at a time 1062 int numMemMicroops = totNumBytes / 16; 1063 int residuum = totNumBytes % 16; 1064 if (residuum) 1065 ++numMemMicroops; 1066 numMicroops += numMemMicroops; 1067 1068 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1069 numMicroops += numMarshalMicroops; 1070 1071 microOps = new StaticInstPtr[numMicroops]; 1072 unsigned uopIdx = 0; 1073 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1074 TLB::AllowUnaligned; 1075 1076 int i = 0; 1077 for(; i < numMemMicroops - 1; ++i) { 1078 microOps[uopIdx++] = new MicroNeonLoad64( 1079 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1080 baseIsSP, 16 /* accSize */, eSize); 1081 } 1082 microOps[uopIdx++] = new MicroNeonLoad64( 1083 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1084 residuum ? residuum : 16 /* accSize */, eSize); 1085 1086 // Writeback microop: the post-increment amount is encoded in "Rm": a 1087 // 64-bit general register OR as '11111' for an immediate value equal to 1088 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1089 if (wb) { 1090 if (rm != ((RegIndex) INTREG_X31)) { 1091 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1092 UXTX, 0); 1093 } else { 1094 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1095 totNumBytes); 1096 } 1097 } 1098 1099 for (int i = 0; i < numMarshalMicroops; ++i) { 1100 microOps[uopIdx++] = new MicroDeintNeon64( 1101 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1102 numStructElems, numRegs, i /* step */); 1103 } 1104 1105 assert(uopIdx == numMicroops); 1106 1107 for (int i = 0; i < numMicroops - 1; ++i) { 1108 microOps[i]->setDelayedCommit(); 1109 } 1110 microOps[numMicroops - 1]->setLastMicroop(); 1111} 1112 1113VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, 1114 OpClass __opClass, RegIndex rn, RegIndex vd, 1115 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1116 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1117 PredMacroOp(mnem, machInst, __opClass) 1118{ 1119 RegIndex vx = NumFloatV8ArchRegs / 4; 1120 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1121 bool baseIsSP = isSP((IntRegIndex) rnsp); 1122 1123 numMicroops = wb ? 1 : 0; 1124 1125 int totNumBytes = numRegs * dataSize / 8; 1126 assert(totNumBytes <= 64); 1127 1128 // The guiding principle here is that no more than 16 bytes can be 1129 // transferred at a time 1130 int numMemMicroops = totNumBytes / 16; 1131 int residuum = totNumBytes % 16; 1132 if (residuum) 1133 ++numMemMicroops; 1134 numMicroops += numMemMicroops; 1135 1136 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1137 numMicroops += numMarshalMicroops; 1138 1139 microOps = new StaticInstPtr[numMicroops]; 1140 unsigned uopIdx = 0; 1141 1142 for(int i = 0; i < numMarshalMicroops; ++i) { 1143 microOps[uopIdx++] = new MicroIntNeon64( 1144 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1145 numStructElems, numRegs, i /* step */); 1146 } 1147 1148 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1149 TLB::AllowUnaligned; 1150 1151 int i = 0; 1152 for(; i < numMemMicroops - 1; ++i) { 1153 microOps[uopIdx++] = new MicroNeonStore64( 1154 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1155 baseIsSP, 16 /* accSize */, eSize); 1156 } 1157 microOps[uopIdx++] = new MicroNeonStore64( 1158 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1159 residuum ? residuum : 16 /* accSize */, eSize); 1160 1161 // Writeback microop: the post-increment amount is encoded in "Rm": a 1162 // 64-bit general register OR as '11111' for an immediate value equal to 1163 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1164 if (wb) { 1165 if (rm != ((RegIndex) INTREG_X31)) { 1166 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1167 UXTX, 0); 1168 } else { 1169 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1170 totNumBytes); 1171 } 1172 } 1173 1174 assert(uopIdx == numMicroops); 1175 1176 for (int i = 0; i < numMicroops - 1; i++) { 1177 microOps[i]->setDelayedCommit(); 1178 } 1179 microOps[numMicroops - 1]->setLastMicroop(); 1180} 1181 1182VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, 1183 OpClass __opClass, RegIndex rn, RegIndex vd, 1184 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1185 uint8_t numStructElems, uint8_t index, bool wb, 1186 bool replicate) : 1187 PredMacroOp(mnem, machInst, __opClass) 1188{ 1189 RegIndex vx = NumFloatV8ArchRegs / 4; 1190 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1191 bool baseIsSP = isSP((IntRegIndex) rnsp); 1192 1193 numMicroops = wb ? 1 : 0; 1194 1195 int eSizeBytes = 1 << eSize; 1196 int totNumBytes = numStructElems * eSizeBytes; 1197 assert(totNumBytes <= 64); 1198 1199 // The guiding principle here is that no more than 16 bytes can be 1200 // transferred at a time 1201 int numMemMicroops = totNumBytes / 16; 1202 int residuum = totNumBytes % 16; 1203 if (residuum) 1204 ++numMemMicroops; 1205 numMicroops += numMemMicroops; 1206 1207 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1208 numMicroops += numMarshalMicroops; 1209 1210 microOps = new StaticInstPtr[numMicroops]; 1211 unsigned uopIdx = 0; 1212 1213 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1214 TLB::AllowUnaligned; 1215 1216 int i = 0; 1217 for (; i < numMemMicroops - 1; ++i) { 1218 microOps[uopIdx++] = new MicroNeonLoad64( 1219 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1220 baseIsSP, 16 /* accSize */, eSize); 1221 } 1222 microOps[uopIdx++] = new MicroNeonLoad64( 1223 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1224 residuum ? residuum : 16 /* accSize */, eSize); 1225 1226 // Writeback microop: the post-increment amount is encoded in "Rm": a 1227 // 64-bit general register OR as '11111' for an immediate value equal to 1228 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1229 if (wb) { 1230 if (rm != ((RegIndex) INTREG_X31)) { 1231 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1232 UXTX, 0); 1233 } else { 1234 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1235 totNumBytes); 1236 } 1237 } 1238 1239 for(int i = 0; i < numMarshalMicroops; ++i) { 1240 microOps[uopIdx++] = new MicroUnpackNeon64( 1241 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1242 numStructElems, index, i /* step */, replicate); 1243 } 1244 1245 assert(uopIdx == numMicroops); 1246 1247 for (int i = 0; i < numMicroops - 1; i++) { 1248 microOps[i]->setDelayedCommit(); 1249 } 1250 microOps[numMicroops - 1]->setLastMicroop(); 1251} 1252 1253VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, 1254 OpClass __opClass, RegIndex rn, RegIndex vd, 1255 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1256 uint8_t numStructElems, uint8_t index, bool wb, 1257 bool replicate) : 1258 PredMacroOp(mnem, machInst, __opClass) 1259{ 1260 RegIndex vx = NumFloatV8ArchRegs / 4; 1261 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1262 bool baseIsSP = isSP((IntRegIndex) rnsp); 1263 1264 numMicroops = wb ? 1 : 0; 1265 1266 int eSizeBytes = 1 << eSize; 1267 int totNumBytes = numStructElems * eSizeBytes; 1268 assert(totNumBytes <= 64); 1269 1270 // The guiding principle here is that no more than 16 bytes can be 1271 // transferred at a time 1272 int numMemMicroops = totNumBytes / 16; 1273 int residuum = totNumBytes % 16; 1274 if (residuum) 1275 ++numMemMicroops; 1276 numMicroops += numMemMicroops; 1277 1278 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1279 numMicroops += numMarshalMicroops; 1280 1281 microOps = new StaticInstPtr[numMicroops]; 1282 unsigned uopIdx = 0; 1283 1284 for(int i = 0; i < numMarshalMicroops; ++i) { 1285 microOps[uopIdx++] = new MicroPackNeon64( 1286 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1287 numStructElems, index, i /* step */, replicate); 1288 } 1289 1290 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1291 TLB::AllowUnaligned; 1292 1293 int i = 0; 1294 for(; i < numMemMicroops - 1; ++i) { 1295 microOps[uopIdx++] = new MicroNeonStore64( 1296 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1297 baseIsSP, 16 /* accsize */, eSize); 1298 } 1299 microOps[uopIdx++] = new MicroNeonStore64( 1300 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1301 residuum ? residuum : 16 /* accSize */, eSize); 1302 1303 // Writeback microop: the post-increment amount is encoded in "Rm": a 1304 // 64-bit general register OR as '11111' for an immediate value equal to 1305 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1306 if (wb) { 1307 if (rm != ((RegIndex) INTREG_X31)) { 1308 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1309 UXTX, 0); 1310 } else { 1311 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1312 totNumBytes); 1313 } 1314 } 1315 1316 assert(uopIdx == numMicroops); 1317 1318 for (int i = 0; i < numMicroops - 1; i++) { 1319 microOps[i]->setDelayedCommit(); 1320 } 1321 microOps[numMicroops - 1]->setLastMicroop(); 1322} 1323 1324MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, 1325 OpClass __opClass, IntRegIndex rn, 1326 RegIndex vd, bool single, bool up, 1327 bool writeback, bool load, uint32_t offset) : 1328 PredMacroOp(mnem, machInst, __opClass) 1329{ 1330 int i = 0; 1331 1332 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1333 // to be functionally identical except that fldmx is deprecated. For now 1334 // we'll assume they're otherwise interchangable. 1335 int count = (single ? offset : (offset / 2)); 1336 if (count == 0 || count > NumFloatV7ArchRegs) 1337 warn_once("Bad offset field for VFP load/store multiple.\n"); 1338 if (count == 0) { 1339 // Force there to be at least one microop so the macroop makes sense. 1340 writeback = true; 1341 } 1342 if (count > NumFloatV7ArchRegs) 1343 count = NumFloatV7ArchRegs; 1344 1345 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1346 microOps = new StaticInstPtr[numMicroops]; 1347 1348 int64_t addr = 0; 1349 1350 if (!up) 1351 addr = 4 * offset; 1352 1353 bool tempUp = up; 1354 for (int j = 0; j < count; j++) { 1355 if (load) { 1356 if (single) { 1357 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1358 tempUp, addr); 1359 } else { 1360 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1361 tempUp, addr); 1362 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1363 addr + (up ? 4 : -4)); 1364 } 1365 } else { 1366 if (single) { 1367 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1368 tempUp, addr); 1369 } else { 1370 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1371 tempUp, addr); 1372 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1373 addr + (up ? 4 : -4)); 1374 } 1375 } 1376 if (!tempUp) { 1377 addr -= (single ? 4 : 8); 1378 // The microops don't handle negative displacement, so turn if we 1379 // hit zero, flip polarity and start adding. 1380 if (addr <= 0) { 1381 tempUp = true; 1382 addr = -addr; 1383 } 1384 } else { 1385 addr += (single ? 4 : 8); 1386 } 1387 } 1388 1389 if (writeback) { 1390 if (up) { 1391 microOps[i++] = 1392 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1393 } else { 1394 microOps[i++] = 1395 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1396 } 1397 } 1398 1399 assert(numMicroops == i); 1400 microOps[numMicroops - 1]->setLastMicroop(); 1401 1402 for (StaticInstPtr *curUop = microOps; 1403 !(*curUop)->isLastMicroop(); curUop++) { 1404 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 1405 assert(uopPtr); 1406 uopPtr->setDelayedCommit(); 1407 } 1408} 1409 1410std::string 1411MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1412{ 1413 std::stringstream ss; 1414 printMnemonic(ss); 1415 printReg(ss, ura); 1416 ss << ", "; 1417 printReg(ss, urb); 1418 ss << ", "; 1419 ccprintf(ss, "#%d", imm); 1420 return ss.str(); 1421} 1422 1423std::string 1424MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1425{ 1426 std::stringstream ss; 1427 printMnemonic(ss); 1428 printReg(ss, ura); 1429 ss << ", "; 1430 printReg(ss, urb); 1431 ss << ", "; 1432 ccprintf(ss, "#%d", imm); 1433 return ss.str(); 1434} 1435 1436std::string 1437MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1438{ 1439 std::stringstream ss; 1440 printMnemonic(ss); 1441 ss << "[PC,CPSR]"; 1442 return ss.str(); 1443} 1444 1445std::string 1446MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1447{ 1448 std::stringstream ss; 1449 printMnemonic(ss); 1450 printReg(ss, ura); 1451 ccprintf(ss, ", "); 1452 printReg(ss, urb); 1453 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1454 return ss.str(); 1455} 1456 1457std::string 1458MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1459{ 1460 std::stringstream ss; 1461 printMnemonic(ss); 1462 printReg(ss, ura); 1463 ss << ", "; 1464 printReg(ss, urb); 1465 return ss.str(); 1466} 1467 1468std::string 1469MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1470{ 1471 std::stringstream ss; 1472 printMnemonic(ss); 1473 printReg(ss, ura); 1474 ss << ", "; 1475 printReg(ss, urb); 1476 ss << ", "; 1477 printReg(ss, urc); 1478 return ss.str(); 1479} 1480 1481std::string 1482MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1483{ 1484 std::stringstream ss; 1485 printMnemonic(ss); 1486 if (isFloating()) 1487 printReg(ss, ura + FP_Reg_Base); 1488 else 1489 printReg(ss, ura); 1490 ss << ", ["; 1491 printReg(ss, urb); 1492 ss << ", "; 1493 ccprintf(ss, "#%d", imm); 1494 ss << "]"; 1495 return ss.str(); 1496} 1497 1498}
| 184 for (StaticInstPtr *curUop = microOps; 185 !(*curUop)->isLastMicroop(); curUop++) { 186 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 187 assert(uopPtr); 188 uopPtr->setDelayedCommit(); 189 } 190} 191 192PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 193 uint32_t size, bool fp, bool load, bool noAlloc, 194 bool signExt, bool exclusive, bool acrel, 195 int64_t imm, AddrMode mode, 196 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : 197 PredMacroOp(mnem, machInst, __opClass) 198{ 199 bool writeback = (mode != AddrMd_Offset); 200 numMicroops = 1 + (size / 4) + (writeback ? 1 : 0); 201 microOps = new StaticInstPtr[numMicroops]; 202 203 StaticInstPtr *uop = microOps; 204 205 bool post = (mode == AddrMd_PostIndex); 206 207 rn = makeSP(rn); 208 209 *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm); 210 211 if (fp) { 212 if (size == 16) { 213 if (load) { 214 *++uop = new MicroLdrQBFpXImmUop(machInst, rt, 215 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 216 *++uop = new MicroLdrQTFpXImmUop(machInst, rt, 217 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 218 *++uop = new MicroLdrQBFpXImmUop(machInst, rt2, 219 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 220 *++uop = new MicroLdrQTFpXImmUop(machInst, rt2, 221 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 222 } else { 223 *++uop = new MicroStrQBFpXImmUop(machInst, rt, 224 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 225 *++uop = new MicroStrQTFpXImmUop(machInst, rt, 226 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 227 *++uop = new MicroStrQBFpXImmUop(machInst, rt2, 228 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 229 *++uop = new MicroStrQTFpXImmUop(machInst, rt2, 230 INTREG_UREG0, 16, noAlloc, exclusive, acrel); 231 } 232 } else if (size == 8) { 233 if (load) { 234 *++uop = new MicroLdrFpXImmUop(machInst, rt, 235 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 236 *++uop = new MicroLdrFpXImmUop(machInst, rt2, 237 INTREG_UREG0, 8, noAlloc, exclusive, acrel); 238 } else { 239 *++uop = new MicroStrFpXImmUop(machInst, rt, 240 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 241 *++uop = new MicroStrFpXImmUop(machInst, rt2, 242 INTREG_UREG0, 8, noAlloc, exclusive, acrel); 243 } 244 } else if (size == 4) { 245 if (load) { 246 *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, 247 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 248 } else { 249 *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, 250 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 251 } 252 } 253 } else { 254 if (size == 8) { 255 if (load) { 256 *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, 257 0, noAlloc, exclusive, acrel); 258 *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, 259 size, noAlloc, exclusive, acrel); 260 } else { 261 *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0, 262 0, noAlloc, exclusive, acrel); 263 *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0, 264 size, noAlloc, exclusive, acrel); 265 } 266 } else if (size == 4) { 267 if (load) { 268 if (signExt) { 269 *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, 270 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 271 } else { 272 *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, 273 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 274 } 275 } else { 276 *++uop = new MicroStrDXImmUop(machInst, rt, rt2, 277 INTREG_UREG0, 0, noAlloc, exclusive, acrel); 278 } 279 } 280 } 281 282 if (writeback) { 283 *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0, 284 post ? imm : 0); 285 } 286 287 (*uop)->setLastMicroop(); 288 289 for (StaticInstPtr *curUop = microOps; 290 !(*curUop)->isLastMicroop(); curUop++) { 291 (*curUop)->setDelayedCommit(); 292 } 293} 294 295BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, 296 OpClass __opClass, bool load, IntRegIndex dest, 297 IntRegIndex base, int64_t imm) : 298 PredMacroOp(mnem, machInst, __opClass) 299{ 300 numMicroops = 2; 301 microOps = new StaticInstPtr[numMicroops]; 302 303 if (load) { 304 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); 305 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); 306 } else { 307 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 308 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 309 } 310 microOps[0]->setDelayedCommit(); 311 microOps[1]->setLastMicroop(); 312} 313 314BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, 315 OpClass __opClass, bool load, IntRegIndex dest, 316 IntRegIndex base, int64_t imm) : 317 PredMacroOp(mnem, machInst, __opClass) 318{ 319 numMicroops = 3; 320 microOps = new StaticInstPtr[numMicroops]; 321 322 if (load) { 323 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); 324 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0); 325 } else { 326 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); 327 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0); 328 } 329 microOps[2] = new MicroAddXiUop(machInst, base, base, imm); 330 331 microOps[0]->setDelayedCommit(); 332 microOps[1]->setDelayedCommit(); 333 microOps[2]->setLastMicroop(); 334} 335 336BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, 337 OpClass __opClass, bool load, IntRegIndex dest, 338 IntRegIndex base, int64_t imm) : 339 PredMacroOp(mnem, machInst, __opClass) 340{ 341 numMicroops = 3; 342 microOps = new StaticInstPtr[numMicroops]; 343 344 if (load) { 345 microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); 346 microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); 347 } else { 348 microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); 349 microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); 350 } 351 microOps[2] = new MicroAddXiUop(machInst, base, base, imm); 352 353 microOps[0]->setDelayedCommit(); 354 microOps[1]->setDelayedCommit(); 355 microOps[2]->setLastMicroop(); 356} 357 358BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, 359 OpClass __opClass, bool load, IntRegIndex dest, 360 IntRegIndex base, IntRegIndex offset, 361 ArmExtendType type, int64_t imm) : 362 PredMacroOp(mnem, machInst, __opClass) 363{ 364 numMicroops = 2; 365 microOps = new StaticInstPtr[numMicroops]; 366 367 if (load) { 368 microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, 369 offset, type, imm); 370 microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, 371 offset, type, imm); 372 } else { 373 microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, 374 offset, type, imm); 375 microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, 376 offset, type, imm); 377 } 378 379 microOps[0]->setDelayedCommit(); 380 microOps[1]->setLastMicroop(); 381} 382 383BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, 384 OpClass __opClass, IntRegIndex dest, 385 int64_t imm) : 386 PredMacroOp(mnem, machInst, __opClass) 387{ 388 numMicroops = 2; 389 microOps = new StaticInstPtr[numMicroops]; 390 391 microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); 392 microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); 393 394 microOps[0]->setDelayedCommit(); 395 microOps[1]->setLastMicroop(); 396} 397 398VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 399 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 400 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 401 PredMacroOp(mnem, machInst, __opClass) 402{ 403 assert(regs > 0 && regs <= 4); 404 assert(regs % elems == 0); 405 406 numMicroops = (regs > 2) ? 2 : 1; 407 bool wb = (rm != 15); 408 bool deinterleave = (elems > 1); 409 410 if (wb) numMicroops++; 411 if (deinterleave) numMicroops += (regs / elems); 412 microOps = new StaticInstPtr[numMicroops]; 413 414 RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; 415 416 uint32_t noAlign = TLB::MustBeOne; 417 418 unsigned uopIdx = 0; 419 switch (regs) { 420 case 4: 421 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 422 size, machInst, rMid, rn, 0, align); 423 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 424 size, machInst, rMid + 4, rn, 16, noAlign); 425 break; 426 case 3: 427 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 428 size, machInst, rMid, rn, 0, align); 429 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 430 size, machInst, rMid + 4, rn, 16, noAlign); 431 break; 432 case 2: 433 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>( 434 size, machInst, rMid, rn, 0, align); 435 break; 436 case 1: 437 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>( 438 size, machInst, rMid, rn, 0, align); 439 break; 440 default: 441 // Unknown number of registers 442 microOps[uopIdx++] = new Unknown(machInst); 443 } 444 if (wb) { 445 if (rm != 15 && rm != 13) { 446 microOps[uopIdx++] = 447 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 448 } else { 449 microOps[uopIdx++] = 450 new MicroAddiUop(machInst, rn, rn, regs * 8); 451 } 452 } 453 if (deinterleave) { 454 switch (elems) { 455 case 4: 456 assert(regs == 4); 457 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>( 458 size, machInst, vd * 2, rMid, inc * 2); 459 break; 460 case 3: 461 assert(regs == 3); 462 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>( 463 size, machInst, vd * 2, rMid, inc * 2); 464 break; 465 case 2: 466 assert(regs == 4 || regs == 2); 467 if (regs == 4) { 468 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 469 size, machInst, vd * 2, rMid, inc * 2); 470 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 471 size, machInst, vd * 2 + 2, rMid + 4, inc * 2); 472 } else { 473 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>( 474 size, machInst, vd * 2, rMid, inc * 2); 475 } 476 break; 477 default: 478 // Bad number of elements to deinterleave 479 microOps[uopIdx++] = new Unknown(machInst); 480 } 481 } 482 assert(uopIdx == numMicroops); 483 484 for (unsigned i = 0; i < numMicroops - 1; i++) { 485 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 486 assert(uopPtr); 487 uopPtr->setDelayedCommit(); 488 } 489 microOps[numMicroops - 1]->setLastMicroop(); 490} 491 492VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, 493 OpClass __opClass, bool all, unsigned elems, 494 RegIndex rn, RegIndex vd, unsigned regs, 495 unsigned inc, uint32_t size, uint32_t align, 496 RegIndex rm, unsigned lane) : 497 PredMacroOp(mnem, machInst, __opClass) 498{ 499 assert(regs > 0 && regs <= 4); 500 assert(regs % elems == 0); 501 502 unsigned eBytes = (1 << size); 503 unsigned loadSize = eBytes * elems; 504 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) / 505 sizeof(FloatRegBits); 506 507 assert(loadRegs > 0 && loadRegs <= 4); 508 509 numMicroops = 1; 510 bool wb = (rm != 15); 511 512 if (wb) numMicroops++; 513 numMicroops += (regs / elems); 514 microOps = new StaticInstPtr[numMicroops]; 515 516 RegIndex ufp0 = NumFloatV7ArchRegs; 517 518 unsigned uopIdx = 0; 519 switch (loadSize) { 520 case 1: 521 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>( 522 machInst, ufp0, rn, 0, align); 523 break; 524 case 2: 525 if (eBytes == 2) { 526 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>( 527 machInst, ufp0, rn, 0, align); 528 } else { 529 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>( 530 machInst, ufp0, rn, 0, align); 531 } 532 break; 533 case 3: 534 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>( 535 machInst, ufp0, rn, 0, align); 536 break; 537 case 4: 538 switch (eBytes) { 539 case 1: 540 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>( 541 machInst, ufp0, rn, 0, align); 542 break; 543 case 2: 544 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>( 545 machInst, ufp0, rn, 0, align); 546 break; 547 case 4: 548 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>( 549 machInst, ufp0, rn, 0, align); 550 break; 551 } 552 break; 553 case 6: 554 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>( 555 machInst, ufp0, rn, 0, align); 556 break; 557 case 8: 558 switch (eBytes) { 559 case 2: 560 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>( 561 machInst, ufp0, rn, 0, align); 562 break; 563 case 4: 564 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>( 565 machInst, ufp0, rn, 0, align); 566 break; 567 } 568 break; 569 case 12: 570 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>( 571 machInst, ufp0, rn, 0, align); 572 break; 573 case 16: 574 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>( 575 machInst, ufp0, rn, 0, align); 576 break; 577 default: 578 // Unrecognized load size 579 microOps[uopIdx++] = new Unknown(machInst); 580 } 581 if (wb) { 582 if (rm != 15 && rm != 13) { 583 microOps[uopIdx++] = 584 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 585 } else { 586 microOps[uopIdx++] = 587 new MicroAddiUop(machInst, rn, rn, loadSize); 588 } 589 } 590 switch (elems) { 591 case 4: 592 assert(regs == 4); 593 switch (size) { 594 case 0: 595 if (all) { 596 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>( 597 machInst, vd * 2, ufp0, inc * 2); 598 } else { 599 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>( 600 machInst, vd * 2, ufp0, inc * 2, lane); 601 } 602 break; 603 case 1: 604 if (all) { 605 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>( 606 machInst, vd * 2, ufp0, inc * 2); 607 } else { 608 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>( 609 machInst, vd * 2, ufp0, inc * 2, lane); 610 } 611 break; 612 case 2: 613 if (all) { 614 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>( 615 machInst, vd * 2, ufp0, inc * 2); 616 } else { 617 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>( 618 machInst, vd * 2, ufp0, inc * 2, lane); 619 } 620 break; 621 default: 622 // Bad size 623 microOps[uopIdx++] = new Unknown(machInst); 624 break; 625 } 626 break; 627 case 3: 628 assert(regs == 3); 629 switch (size) { 630 case 0: 631 if (all) { 632 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>( 633 machInst, vd * 2, ufp0, inc * 2); 634 } else { 635 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>( 636 machInst, vd * 2, ufp0, inc * 2, lane); 637 } 638 break; 639 case 1: 640 if (all) { 641 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>( 642 machInst, vd * 2, ufp0, inc * 2); 643 } else { 644 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>( 645 machInst, vd * 2, ufp0, inc * 2, lane); 646 } 647 break; 648 case 2: 649 if (all) { 650 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>( 651 machInst, vd * 2, ufp0, inc * 2); 652 } else { 653 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>( 654 machInst, vd * 2, ufp0, inc * 2, lane); 655 } 656 break; 657 default: 658 // Bad size 659 microOps[uopIdx++] = new Unknown(machInst); 660 break; 661 } 662 break; 663 case 2: 664 assert(regs == 2); 665 assert(loadRegs <= 2); 666 switch (size) { 667 case 0: 668 if (all) { 669 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>( 670 machInst, vd * 2, ufp0, inc * 2); 671 } else { 672 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>( 673 machInst, vd * 2, ufp0, inc * 2, lane); 674 } 675 break; 676 case 1: 677 if (all) { 678 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>( 679 machInst, vd * 2, ufp0, inc * 2); 680 } else { 681 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>( 682 machInst, vd * 2, ufp0, inc * 2, lane); 683 } 684 break; 685 case 2: 686 if (all) { 687 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>( 688 machInst, vd * 2, ufp0, inc * 2); 689 } else { 690 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>( 691 machInst, vd * 2, ufp0, inc * 2, lane); 692 } 693 break; 694 default: 695 // Bad size 696 microOps[uopIdx++] = new Unknown(machInst); 697 break; 698 } 699 break; 700 case 1: 701 assert(regs == 1 || (all && regs == 2)); 702 assert(loadRegs <= 2); 703 for (unsigned offset = 0; offset < regs; offset++) { 704 switch (size) { 705 case 0: 706 if (all) { 707 microOps[uopIdx++] = 708 new MicroUnpackAllNeon2to2Uop<uint8_t>( 709 machInst, (vd + offset) * 2, ufp0, inc * 2); 710 } else { 711 microOps[uopIdx++] = 712 new MicroUnpackNeon2to2Uop<uint8_t>( 713 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 714 } 715 break; 716 case 1: 717 if (all) { 718 microOps[uopIdx++] = 719 new MicroUnpackAllNeon2to2Uop<uint16_t>( 720 machInst, (vd + offset) * 2, ufp0, inc * 2); 721 } else { 722 microOps[uopIdx++] = 723 new MicroUnpackNeon2to2Uop<uint16_t>( 724 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 725 } 726 break; 727 case 2: 728 if (all) { 729 microOps[uopIdx++] = 730 new MicroUnpackAllNeon2to2Uop<uint32_t>( 731 machInst, (vd + offset) * 2, ufp0, inc * 2); 732 } else { 733 microOps[uopIdx++] = 734 new MicroUnpackNeon2to2Uop<uint32_t>( 735 machInst, (vd + offset) * 2, ufp0, inc * 2, lane); 736 } 737 break; 738 default: 739 // Bad size 740 microOps[uopIdx++] = new Unknown(machInst); 741 break; 742 } 743 } 744 break; 745 default: 746 // Bad number of elements to unpack 747 microOps[uopIdx++] = new Unknown(machInst); 748 } 749 assert(uopIdx == numMicroops); 750 751 for (unsigned i = 0; i < numMicroops - 1; i++) { 752 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 753 assert(uopPtr); 754 uopPtr->setDelayedCommit(); 755 } 756 microOps[numMicroops - 1]->setLastMicroop(); 757} 758 759VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, 760 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, 761 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : 762 PredMacroOp(mnem, machInst, __opClass) 763{ 764 assert(regs > 0 && regs <= 4); 765 assert(regs % elems == 0); 766 767 numMicroops = (regs > 2) ? 2 : 1; 768 bool wb = (rm != 15); 769 bool interleave = (elems > 1); 770 771 if (wb) numMicroops++; 772 if (interleave) numMicroops += (regs / elems); 773 microOps = new StaticInstPtr[numMicroops]; 774 775 uint32_t noAlign = TLB::MustBeOne; 776 777 RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; 778 779 unsigned uopIdx = 0; 780 if (interleave) { 781 switch (elems) { 782 case 4: 783 assert(regs == 4); 784 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>( 785 size, machInst, rMid, vd * 2, inc * 2); 786 break; 787 case 3: 788 assert(regs == 3); 789 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>( 790 size, machInst, rMid, vd * 2, inc * 2); 791 break; 792 case 2: 793 assert(regs == 4 || regs == 2); 794 if (regs == 4) { 795 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 796 size, machInst, rMid, vd * 2, inc * 2); 797 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 798 size, machInst, rMid + 4, vd * 2 + 2, inc * 2); 799 } else { 800 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>( 801 size, machInst, rMid, vd * 2, inc * 2); 802 } 803 break; 804 default: 805 // Bad number of elements to interleave 806 microOps[uopIdx++] = new Unknown(machInst); 807 } 808 } 809 switch (regs) { 810 case 4: 811 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 812 size, machInst, rMid, rn, 0, align); 813 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 814 size, machInst, rMid + 4, rn, 16, noAlign); 815 break; 816 case 3: 817 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 818 size, machInst, rMid, rn, 0, align); 819 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 820 size, machInst, rMid + 4, rn, 16, noAlign); 821 break; 822 case 2: 823 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>( 824 size, machInst, rMid, rn, 0, align); 825 break; 826 case 1: 827 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>( 828 size, machInst, rMid, rn, 0, align); 829 break; 830 default: 831 // Unknown number of registers 832 microOps[uopIdx++] = new Unknown(machInst); 833 } 834 if (wb) { 835 if (rm != 15 && rm != 13) { 836 microOps[uopIdx++] = 837 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 838 } else { 839 microOps[uopIdx++] = 840 new MicroAddiUop(machInst, rn, rn, regs * 8); 841 } 842 } 843 assert(uopIdx == numMicroops); 844 845 for (unsigned i = 0; i < numMicroops - 1; i++) { 846 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 847 assert(uopPtr); 848 uopPtr->setDelayedCommit(); 849 } 850 microOps[numMicroops - 1]->setLastMicroop(); 851} 852 853VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, 854 OpClass __opClass, bool all, unsigned elems, 855 RegIndex rn, RegIndex vd, unsigned regs, 856 unsigned inc, uint32_t size, uint32_t align, 857 RegIndex rm, unsigned lane) : 858 PredMacroOp(mnem, machInst, __opClass) 859{ 860 assert(!all); 861 assert(regs > 0 && regs <= 4); 862 assert(regs % elems == 0); 863 864 unsigned eBytes = (1 << size); 865 unsigned storeSize = eBytes * elems; 866 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) / 867 sizeof(FloatRegBits); 868 869 assert(storeRegs > 0 && storeRegs <= 4); 870 871 numMicroops = 1; 872 bool wb = (rm != 15); 873 874 if (wb) numMicroops++; 875 numMicroops += (regs / elems); 876 microOps = new StaticInstPtr[numMicroops]; 877 878 RegIndex ufp0 = NumFloatV7ArchRegs; 879 880 unsigned uopIdx = 0; 881 switch (elems) { 882 case 4: 883 assert(regs == 4); 884 switch (size) { 885 case 0: 886 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>( 887 machInst, ufp0, vd * 2, inc * 2, lane); 888 break; 889 case 1: 890 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>( 891 machInst, ufp0, vd * 2, inc * 2, lane); 892 break; 893 case 2: 894 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>( 895 machInst, ufp0, vd * 2, inc * 2, lane); 896 break; 897 default: 898 // Bad size 899 microOps[uopIdx++] = new Unknown(machInst); 900 break; 901 } 902 break; 903 case 3: 904 assert(regs == 3); 905 switch (size) { 906 case 0: 907 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>( 908 machInst, ufp0, vd * 2, inc * 2, lane); 909 break; 910 case 1: 911 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>( 912 machInst, ufp0, vd * 2, inc * 2, lane); 913 break; 914 case 2: 915 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>( 916 machInst, ufp0, vd * 2, inc * 2, lane); 917 break; 918 default: 919 // Bad size 920 microOps[uopIdx++] = new Unknown(machInst); 921 break; 922 } 923 break; 924 case 2: 925 assert(regs == 2); 926 assert(storeRegs <= 2); 927 switch (size) { 928 case 0: 929 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>( 930 machInst, ufp0, vd * 2, inc * 2, lane); 931 break; 932 case 1: 933 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>( 934 machInst, ufp0, vd * 2, inc * 2, lane); 935 break; 936 case 2: 937 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>( 938 machInst, ufp0, vd * 2, inc * 2, lane); 939 break; 940 default: 941 // Bad size 942 microOps[uopIdx++] = new Unknown(machInst); 943 break; 944 } 945 break; 946 case 1: 947 assert(regs == 1 || (all && regs == 2)); 948 assert(storeRegs <= 2); 949 for (unsigned offset = 0; offset < regs; offset++) { 950 switch (size) { 951 case 0: 952 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>( 953 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 954 break; 955 case 1: 956 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>( 957 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 958 break; 959 case 2: 960 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>( 961 machInst, ufp0, (vd + offset) * 2, inc * 2, lane); 962 break; 963 default: 964 // Bad size 965 microOps[uopIdx++] = new Unknown(machInst); 966 break; 967 } 968 } 969 break; 970 default: 971 // Bad number of elements to unpack 972 microOps[uopIdx++] = new Unknown(machInst); 973 } 974 switch (storeSize) { 975 case 1: 976 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>( 977 machInst, ufp0, rn, 0, align); 978 break; 979 case 2: 980 if (eBytes == 2) { 981 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>( 982 machInst, ufp0, rn, 0, align); 983 } else { 984 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>( 985 machInst, ufp0, rn, 0, align); 986 } 987 break; 988 case 3: 989 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>( 990 machInst, ufp0, rn, 0, align); 991 break; 992 case 4: 993 switch (eBytes) { 994 case 1: 995 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>( 996 machInst, ufp0, rn, 0, align); 997 break; 998 case 2: 999 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>( 1000 machInst, ufp0, rn, 0, align); 1001 break; 1002 case 4: 1003 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>( 1004 machInst, ufp0, rn, 0, align); 1005 break; 1006 } 1007 break; 1008 case 6: 1009 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>( 1010 machInst, ufp0, rn, 0, align); 1011 break; 1012 case 8: 1013 switch (eBytes) { 1014 case 2: 1015 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>( 1016 machInst, ufp0, rn, 0, align); 1017 break; 1018 case 4: 1019 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>( 1020 machInst, ufp0, rn, 0, align); 1021 break; 1022 } 1023 break; 1024 case 12: 1025 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>( 1026 machInst, ufp0, rn, 0, align); 1027 break; 1028 case 16: 1029 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>( 1030 machInst, ufp0, rn, 0, align); 1031 break; 1032 default: 1033 // Bad store size 1034 microOps[uopIdx++] = new Unknown(machInst); 1035 } 1036 if (wb) { 1037 if (rm != 15 && rm != 13) { 1038 microOps[uopIdx++] = 1039 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL); 1040 } else { 1041 microOps[uopIdx++] = 1042 new MicroAddiUop(machInst, rn, rn, storeSize); 1043 } 1044 } 1045 assert(uopIdx == numMicroops); 1046 1047 for (unsigned i = 0; i < numMicroops - 1; i++) { 1048 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get()); 1049 assert(uopPtr); 1050 uopPtr->setDelayedCommit(); 1051 } 1052 microOps[numMicroops - 1]->setLastMicroop(); 1053} 1054 1055VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, 1056 OpClass __opClass, RegIndex rn, RegIndex vd, 1057 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1058 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1059 PredMacroOp(mnem, machInst, __opClass) 1060{ 1061 RegIndex vx = NumFloatV8ArchRegs / 4; 1062 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1063 bool baseIsSP = isSP((IntRegIndex) rnsp); 1064 1065 numMicroops = wb ? 1 : 0; 1066 1067 int totNumBytes = numRegs * dataSize / 8; 1068 assert(totNumBytes <= 64); 1069 1070 // The guiding principle here is that no more than 16 bytes can be 1071 // transferred at a time 1072 int numMemMicroops = totNumBytes / 16; 1073 int residuum = totNumBytes % 16; 1074 if (residuum) 1075 ++numMemMicroops; 1076 numMicroops += numMemMicroops; 1077 1078 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); 1079 numMicroops += numMarshalMicroops; 1080 1081 microOps = new StaticInstPtr[numMicroops]; 1082 unsigned uopIdx = 0; 1083 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1084 TLB::AllowUnaligned; 1085 1086 int i = 0; 1087 for(; i < numMemMicroops - 1; ++i) { 1088 microOps[uopIdx++] = new MicroNeonLoad64( 1089 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1090 baseIsSP, 16 /* accSize */, eSize); 1091 } 1092 microOps[uopIdx++] = new MicroNeonLoad64( 1093 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1094 residuum ? residuum : 16 /* accSize */, eSize); 1095 1096 // Writeback microop: the post-increment amount is encoded in "Rm": a 1097 // 64-bit general register OR as '11111' for an immediate value equal to 1098 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1099 if (wb) { 1100 if (rm != ((RegIndex) INTREG_X31)) { 1101 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1102 UXTX, 0); 1103 } else { 1104 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1105 totNumBytes); 1106 } 1107 } 1108 1109 for (int i = 0; i < numMarshalMicroops; ++i) { 1110 microOps[uopIdx++] = new MicroDeintNeon64( 1111 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1112 numStructElems, numRegs, i /* step */); 1113 } 1114 1115 assert(uopIdx == numMicroops); 1116 1117 for (int i = 0; i < numMicroops - 1; ++i) { 1118 microOps[i]->setDelayedCommit(); 1119 } 1120 microOps[numMicroops - 1]->setLastMicroop(); 1121} 1122 1123VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, 1124 OpClass __opClass, RegIndex rn, RegIndex vd, 1125 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1126 uint8_t numStructElems, uint8_t numRegs, bool wb) : 1127 PredMacroOp(mnem, machInst, __opClass) 1128{ 1129 RegIndex vx = NumFloatV8ArchRegs / 4; 1130 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1131 bool baseIsSP = isSP((IntRegIndex) rnsp); 1132 1133 numMicroops = wb ? 1 : 0; 1134 1135 int totNumBytes = numRegs * dataSize / 8; 1136 assert(totNumBytes <= 64); 1137 1138 // The guiding principle here is that no more than 16 bytes can be 1139 // transferred at a time 1140 int numMemMicroops = totNumBytes / 16; 1141 int residuum = totNumBytes % 16; 1142 if (residuum) 1143 ++numMemMicroops; 1144 numMicroops += numMemMicroops; 1145 1146 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1147 numMicroops += numMarshalMicroops; 1148 1149 microOps = new StaticInstPtr[numMicroops]; 1150 unsigned uopIdx = 0; 1151 1152 for(int i = 0; i < numMarshalMicroops; ++i) { 1153 microOps[uopIdx++] = new MicroIntNeon64( 1154 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1155 numStructElems, numRegs, i /* step */); 1156 } 1157 1158 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1159 TLB::AllowUnaligned; 1160 1161 int i = 0; 1162 for(; i < numMemMicroops - 1; ++i) { 1163 microOps[uopIdx++] = new MicroNeonStore64( 1164 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1165 baseIsSP, 16 /* accSize */, eSize); 1166 } 1167 microOps[uopIdx++] = new MicroNeonStore64( 1168 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1169 residuum ? residuum : 16 /* accSize */, eSize); 1170 1171 // Writeback microop: the post-increment amount is encoded in "Rm": a 1172 // 64-bit general register OR as '11111' for an immediate value equal to 1173 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1174 if (wb) { 1175 if (rm != ((RegIndex) INTREG_X31)) { 1176 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1177 UXTX, 0); 1178 } else { 1179 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1180 totNumBytes); 1181 } 1182 } 1183 1184 assert(uopIdx == numMicroops); 1185 1186 for (int i = 0; i < numMicroops - 1; i++) { 1187 microOps[i]->setDelayedCommit(); 1188 } 1189 microOps[numMicroops - 1]->setLastMicroop(); 1190} 1191 1192VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, 1193 OpClass __opClass, RegIndex rn, RegIndex vd, 1194 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1195 uint8_t numStructElems, uint8_t index, bool wb, 1196 bool replicate) : 1197 PredMacroOp(mnem, machInst, __opClass) 1198{ 1199 RegIndex vx = NumFloatV8ArchRegs / 4; 1200 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1201 bool baseIsSP = isSP((IntRegIndex) rnsp); 1202 1203 numMicroops = wb ? 1 : 0; 1204 1205 int eSizeBytes = 1 << eSize; 1206 int totNumBytes = numStructElems * eSizeBytes; 1207 assert(totNumBytes <= 64); 1208 1209 // The guiding principle here is that no more than 16 bytes can be 1210 // transferred at a time 1211 int numMemMicroops = totNumBytes / 16; 1212 int residuum = totNumBytes % 16; 1213 if (residuum) 1214 ++numMemMicroops; 1215 numMicroops += numMemMicroops; 1216 1217 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); 1218 numMicroops += numMarshalMicroops; 1219 1220 microOps = new StaticInstPtr[numMicroops]; 1221 unsigned uopIdx = 0; 1222 1223 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1224 TLB::AllowUnaligned; 1225 1226 int i = 0; 1227 for (; i < numMemMicroops - 1; ++i) { 1228 microOps[uopIdx++] = new MicroNeonLoad64( 1229 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1230 baseIsSP, 16 /* accSize */, eSize); 1231 } 1232 microOps[uopIdx++] = new MicroNeonLoad64( 1233 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1234 residuum ? residuum : 16 /* accSize */, eSize); 1235 1236 // Writeback microop: the post-increment amount is encoded in "Rm": a 1237 // 64-bit general register OR as '11111' for an immediate value equal to 1238 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1239 if (wb) { 1240 if (rm != ((RegIndex) INTREG_X31)) { 1241 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1242 UXTX, 0); 1243 } else { 1244 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1245 totNumBytes); 1246 } 1247 } 1248 1249 for(int i = 0; i < numMarshalMicroops; ++i) { 1250 microOps[uopIdx++] = new MicroUnpackNeon64( 1251 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, 1252 numStructElems, index, i /* step */, replicate); 1253 } 1254 1255 assert(uopIdx == numMicroops); 1256 1257 for (int i = 0; i < numMicroops - 1; i++) { 1258 microOps[i]->setDelayedCommit(); 1259 } 1260 microOps[numMicroops - 1]->setLastMicroop(); 1261} 1262 1263VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, 1264 OpClass __opClass, RegIndex rn, RegIndex vd, 1265 RegIndex rm, uint8_t eSize, uint8_t dataSize, 1266 uint8_t numStructElems, uint8_t index, bool wb, 1267 bool replicate) : 1268 PredMacroOp(mnem, machInst, __opClass) 1269{ 1270 RegIndex vx = NumFloatV8ArchRegs / 4; 1271 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); 1272 bool baseIsSP = isSP((IntRegIndex) rnsp); 1273 1274 numMicroops = wb ? 1 : 0; 1275 1276 int eSizeBytes = 1 << eSize; 1277 int totNumBytes = numStructElems * eSizeBytes; 1278 assert(totNumBytes <= 64); 1279 1280 // The guiding principle here is that no more than 16 bytes can be 1281 // transferred at a time 1282 int numMemMicroops = totNumBytes / 16; 1283 int residuum = totNumBytes % 16; 1284 if (residuum) 1285 ++numMemMicroops; 1286 numMicroops += numMemMicroops; 1287 1288 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; 1289 numMicroops += numMarshalMicroops; 1290 1291 microOps = new StaticInstPtr[numMicroops]; 1292 unsigned uopIdx = 0; 1293 1294 for(int i = 0; i < numMarshalMicroops; ++i) { 1295 microOps[uopIdx++] = new MicroPackNeon64( 1296 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, 1297 numStructElems, index, i /* step */, replicate); 1298 } 1299 1300 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | 1301 TLB::AllowUnaligned; 1302 1303 int i = 0; 1304 for(; i < numMemMicroops - 1; ++i) { 1305 microOps[uopIdx++] = new MicroNeonStore64( 1306 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, 1307 baseIsSP, 16 /* accsize */, eSize); 1308 } 1309 microOps[uopIdx++] = new MicroNeonStore64( 1310 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, 1311 residuum ? residuum : 16 /* accSize */, eSize); 1312 1313 // Writeback microop: the post-increment amount is encoded in "Rm": a 1314 // 64-bit general register OR as '11111' for an immediate value equal to 1315 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) 1316 if (wb) { 1317 if (rm != ((RegIndex) INTREG_X31)) { 1318 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, 1319 UXTX, 0); 1320 } else { 1321 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, 1322 totNumBytes); 1323 } 1324 } 1325 1326 assert(uopIdx == numMicroops); 1327 1328 for (int i = 0; i < numMicroops - 1; i++) { 1329 microOps[i]->setDelayedCommit(); 1330 } 1331 microOps[numMicroops - 1]->setLastMicroop(); 1332} 1333 1334MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, 1335 OpClass __opClass, IntRegIndex rn, 1336 RegIndex vd, bool single, bool up, 1337 bool writeback, bool load, uint32_t offset) : 1338 PredMacroOp(mnem, machInst, __opClass) 1339{ 1340 int i = 0; 1341 1342 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem 1343 // to be functionally identical except that fldmx is deprecated. For now 1344 // we'll assume they're otherwise interchangable. 1345 int count = (single ? offset : (offset / 2)); 1346 if (count == 0 || count > NumFloatV7ArchRegs) 1347 warn_once("Bad offset field for VFP load/store multiple.\n"); 1348 if (count == 0) { 1349 // Force there to be at least one microop so the macroop makes sense. 1350 writeback = true; 1351 } 1352 if (count > NumFloatV7ArchRegs) 1353 count = NumFloatV7ArchRegs; 1354 1355 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); 1356 microOps = new StaticInstPtr[numMicroops]; 1357 1358 int64_t addr = 0; 1359 1360 if (!up) 1361 addr = 4 * offset; 1362 1363 bool tempUp = up; 1364 for (int j = 0; j < count; j++) { 1365 if (load) { 1366 if (single) { 1367 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, 1368 tempUp, addr); 1369 } else { 1370 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn, 1371 tempUp, addr); 1372 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp, 1373 addr + (up ? 4 : -4)); 1374 } 1375 } else { 1376 if (single) { 1377 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, 1378 tempUp, addr); 1379 } else { 1380 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn, 1381 tempUp, addr); 1382 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp, 1383 addr + (up ? 4 : -4)); 1384 } 1385 } 1386 if (!tempUp) { 1387 addr -= (single ? 4 : 8); 1388 // The microops don't handle negative displacement, so turn if we 1389 // hit zero, flip polarity and start adding. 1390 if (addr <= 0) { 1391 tempUp = true; 1392 addr = -addr; 1393 } 1394 } else { 1395 addr += (single ? 4 : 8); 1396 } 1397 } 1398 1399 if (writeback) { 1400 if (up) { 1401 microOps[i++] = 1402 new MicroAddiUop(machInst, rn, rn, 4 * offset); 1403 } else { 1404 microOps[i++] = 1405 new MicroSubiUop(machInst, rn, rn, 4 * offset); 1406 } 1407 } 1408 1409 assert(numMicroops == i); 1410 microOps[numMicroops - 1]->setLastMicroop(); 1411 1412 for (StaticInstPtr *curUop = microOps; 1413 !(*curUop)->isLastMicroop(); curUop++) { 1414 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get()); 1415 assert(uopPtr); 1416 uopPtr->setDelayedCommit(); 1417 } 1418} 1419 1420std::string 1421MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1422{ 1423 std::stringstream ss; 1424 printMnemonic(ss); 1425 printReg(ss, ura); 1426 ss << ", "; 1427 printReg(ss, urb); 1428 ss << ", "; 1429 ccprintf(ss, "#%d", imm); 1430 return ss.str(); 1431} 1432 1433std::string 1434MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1435{ 1436 std::stringstream ss; 1437 printMnemonic(ss); 1438 printReg(ss, ura); 1439 ss << ", "; 1440 printReg(ss, urb); 1441 ss << ", "; 1442 ccprintf(ss, "#%d", imm); 1443 return ss.str(); 1444} 1445 1446std::string 1447MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1448{ 1449 std::stringstream ss; 1450 printMnemonic(ss); 1451 ss << "[PC,CPSR]"; 1452 return ss.str(); 1453} 1454 1455std::string 1456MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1457{ 1458 std::stringstream ss; 1459 printMnemonic(ss); 1460 printReg(ss, ura); 1461 ccprintf(ss, ", "); 1462 printReg(ss, urb); 1463 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); 1464 return ss.str(); 1465} 1466 1467std::string 1468MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1469{ 1470 std::stringstream ss; 1471 printMnemonic(ss); 1472 printReg(ss, ura); 1473 ss << ", "; 1474 printReg(ss, urb); 1475 return ss.str(); 1476} 1477 1478std::string 1479MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1480{ 1481 std::stringstream ss; 1482 printMnemonic(ss); 1483 printReg(ss, ura); 1484 ss << ", "; 1485 printReg(ss, urb); 1486 ss << ", "; 1487 printReg(ss, urc); 1488 return ss.str(); 1489} 1490 1491std::string 1492MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const 1493{ 1494 std::stringstream ss; 1495 printMnemonic(ss); 1496 if (isFloating()) 1497 printReg(ss, ura + FP_Reg_Base); 1498 else 1499 printReg(ss, ura); 1500 ss << ", ["; 1501 printReg(ss, urb); 1502 ss << ", "; 1503 ccprintf(ss, "#%d", imm); 1504 ss << "]"; 1505 return ss.str(); 1506} 1507 1508}
|