neon.isa revision 7641
1// -*- mode:c++ -*- 2 3// Copyright (c) 2010 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Gabe Black 39 40output header {{ 41 template <template <typename T> class Base> 42 StaticInstPtr 43 decodeNeonUThreeUReg(unsigned size, 44 ExtMachInst machInst, IntRegIndex dest, 45 IntRegIndex op1, IntRegIndex op2) 46 { 47 switch (size) { 48 case 0: 49 return new Base<uint8_t>(machInst, dest, op1, op2); 50 case 1: 51 return new Base<uint16_t>(machInst, dest, op1, op2); 52 case 2: 53 return new Base<uint32_t>(machInst, dest, op1, op2); 54 case 3: 55 return new Base<uint64_t>(machInst, dest, op1, op2); 56 default: 57 return new Unknown(machInst); 58 } 59 } 60 61 template <template <typename T> class Base> 62 StaticInstPtr 63 decodeNeonSThreeUReg(unsigned size, 64 ExtMachInst machInst, IntRegIndex dest, 65 IntRegIndex op1, IntRegIndex op2) 66 { 67 switch (size) { 68 case 0: 69 return new Base<int8_t>(machInst, dest, op1, op2); 70 case 1: 71 return new Base<int16_t>(machInst, dest, op1, op2); 72 case 2: 73 return new Base<int32_t>(machInst, dest, op1, op2); 74 case 3: 75 return new Base<int64_t>(machInst, dest, op1, op2); 76 default: 77 return new Unknown(machInst); 78 } 79 } 80 81 template <template <typename T> class Base> 82 StaticInstPtr 83 decodeNeonUSThreeUReg(bool notSigned, unsigned size, 84 ExtMachInst machInst, IntRegIndex dest, 85 IntRegIndex op1, IntRegIndex op2) 86 { 87 if (notSigned) { 88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2); 89 } else { 90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2); 91 } 92 } 93 94 template <template <typename T> class Base> 95 StaticInstPtr 96 decodeNeonUThreeUSReg(unsigned size, 97 ExtMachInst machInst, IntRegIndex dest, 98 IntRegIndex op1, IntRegIndex op2) 99 { 100 switch (size) { 101 case 0: 102 return new Base<uint8_t>(machInst, dest, op1, op2); 103 case 1: 104 return new Base<uint16_t>(machInst, dest, op1, op2); 105 case 2: 106 return new Base<uint32_t>(machInst, dest, op1, op2); 107 default: 108 return new Unknown(machInst); 109 } 110 } 111 112 template <template <typename T> class Base> 113 StaticInstPtr 114 decodeNeonSThreeUSReg(unsigned size, 115 ExtMachInst machInst, IntRegIndex dest, 116 IntRegIndex op1, IntRegIndex op2) 117 { 118 switch (size) { 119 case 0: 120 return new Base<int8_t>(machInst, dest, op1, op2); 121 case 1: 122 return new Base<int16_t>(machInst, dest, op1, op2); 123 case 2: 124 return new Base<int32_t>(machInst, dest, op1, op2); 125 default: 126 return new Unknown(machInst); 127 } 128 } 129 130 template <template <typename T> class Base> 131 StaticInstPtr 132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size, 133 ExtMachInst machInst, IntRegIndex dest, 134 IntRegIndex op1, IntRegIndex op2) 135 { 136 if (notSigned) { 137 return decodeNeonUThreeUSReg<Base>( 138 size, machInst, dest, op1, op2); 139 } else { 140 return decodeNeonSThreeUSReg<Base>( 141 size, machInst, dest, op1, op2); 142 } 143 } 144 145 template <template <typename T> class BaseD, 146 template <typename T> class BaseQ> 147 StaticInstPtr 148 decodeNeonUThreeSReg(bool q, unsigned size, 149 ExtMachInst machInst, IntRegIndex dest, 150 IntRegIndex op1, IntRegIndex op2) 151 { 152 if (q) { 153 return decodeNeonUThreeUSReg<BaseQ>( 154 size, machInst, dest, op1, op2); 155 } else { 156 return decodeNeonUThreeUSReg<BaseD>( 157 size, machInst, dest, op1, op2); 158 } 159 } 160 161 template <template <typename T> class BaseD, 162 template <typename T> class BaseQ> 163 StaticInstPtr 164 decodeNeonSThreeSReg(bool q, unsigned size, 165 ExtMachInst machInst, IntRegIndex dest, 166 IntRegIndex op1, IntRegIndex op2) 167 { 168 if (q) { 169 return decodeNeonSThreeUSReg<BaseQ>( 170 size, machInst, dest, op1, op2); 171 } else { 172 return decodeNeonSThreeUSReg<BaseD>( 173 size, machInst, dest, op1, op2); 174 } 175 } 176 177 template <template <typename T> class BaseD, 178 template <typename T> class BaseQ> 179 StaticInstPtr 180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, 181 ExtMachInst machInst, IntRegIndex dest, 182 IntRegIndex op1, IntRegIndex op2) 183 { 184 if (notSigned) { 185 return decodeNeonUThreeSReg<BaseD, BaseQ>( 186 q, size, machInst, dest, op1, op2); 187 } else { 188 return decodeNeonSThreeSReg<BaseD, BaseQ>( 189 q, size, machInst, dest, op1, op2); 190 } 191 } 192 193 template <template <typename T> class BaseD, 194 template <typename T> class BaseQ> 195 StaticInstPtr 196 decodeNeonUThreeReg(bool q, unsigned size, 197 ExtMachInst machInst, IntRegIndex dest, 198 IntRegIndex op1, IntRegIndex op2) 199 { 200 if (q) { 201 return decodeNeonUThreeUReg<BaseQ>( 202 size, machInst, dest, op1, op2); 203 } else { 204 return decodeNeonUThreeUReg<BaseD>( 205 size, machInst, dest, op1, op2); 206 } 207 } 208 209 template <template <typename T> class BaseD, 210 template <typename T> class BaseQ> 211 StaticInstPtr 212 decodeNeonSThreeReg(bool q, unsigned size, 213 ExtMachInst machInst, IntRegIndex dest, 214 IntRegIndex op1, IntRegIndex op2) 215 { 216 if (q) { 217 return decodeNeonSThreeUReg<BaseQ>( 218 size, machInst, dest, op1, op2); 219 } else { 220 return decodeNeonSThreeUReg<BaseD>( 221 size, machInst, dest, op1, op2); 222 } 223 } 224 225 template <template <typename T> class BaseD, 226 template <typename T> class BaseQ> 227 StaticInstPtr 228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size, 229 ExtMachInst machInst, IntRegIndex dest, 230 IntRegIndex op1, IntRegIndex op2) 231 { 232 if (notSigned) { 233 return decodeNeonUThreeReg<BaseD, BaseQ>( 234 q, size, machInst, dest, op1, op2); 235 } else { 236 return decodeNeonSThreeReg<BaseD, BaseQ>( 237 q, size, machInst, dest, op1, op2); 238 } 239 } 240 241 template <template <typename T> class BaseD, 242 template <typename T> class BaseQ> 243 StaticInstPtr 244 decodeNeonUTwoShiftReg(bool q, unsigned size, 245 ExtMachInst machInst, IntRegIndex dest, 246 IntRegIndex op1, uint64_t imm) 247 { 248 if (q) { 249 switch (size) { 250 case 0: 251 return new BaseQ<uint8_t>(machInst, dest, op1, imm); 252 case 1: 253 return new BaseQ<uint16_t>(machInst, dest, op1, imm); 254 case 2: 255 return new BaseQ<uint32_t>(machInst, dest, op1, imm); 256 case 3: 257 return new BaseQ<uint64_t>(machInst, dest, op1, imm); 258 default: 259 return new Unknown(machInst); 260 } 261 } else { 262 switch (size) { 263 case 0: 264 return new BaseD<uint8_t>(machInst, dest, op1, imm); 265 case 1: 266 return new BaseD<uint16_t>(machInst, dest, op1, imm); 267 case 2: 268 return new BaseD<uint32_t>(machInst, dest, op1, imm); 269 case 3: 270 return new BaseD<uint64_t>(machInst, dest, op1, imm); 271 default: 272 return new Unknown(machInst); 273 } 274 } 275 } 276 277 template <template <typename T> class BaseD, 278 template <typename T> class BaseQ> 279 StaticInstPtr 280 decodeNeonSTwoShiftReg(bool q, unsigned size, 281 ExtMachInst machInst, IntRegIndex dest, 282 IntRegIndex op1, uint64_t imm) 283 { 284 if (q) { 285 switch (size) { 286 case 0: 287 return new BaseQ<int8_t>(machInst, dest, op1, imm); 288 case 1: 289 return new BaseQ<int16_t>(machInst, dest, op1, imm); 290 case 2: 291 return new BaseQ<int32_t>(machInst, dest, op1, imm); 292 case 3: 293 return new BaseQ<int64_t>(machInst, dest, op1, imm); 294 default: 295 return new Unknown(machInst); 296 } 297 } else { 298 switch (size) { 299 case 0: 300 return new BaseD<int8_t>(machInst, dest, op1, imm); 301 case 1: 302 return new BaseD<int16_t>(machInst, dest, op1, imm); 303 case 2: 304 return new BaseD<int32_t>(machInst, dest, op1, imm); 305 case 3: 306 return new BaseD<int64_t>(machInst, dest, op1, imm); 307 default: 308 return new Unknown(machInst); 309 } 310 } 311 } 312 313 314 template <template <typename T> class BaseD, 315 template <typename T> class BaseQ> 316 StaticInstPtr 317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size, 318 ExtMachInst machInst, IntRegIndex dest, 319 IntRegIndex op1, uint64_t imm) 320 { 321 if (notSigned) { 322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>( 323 q, size, machInst, dest, op1, imm); 324 } else { 325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>( 326 q, size, machInst, dest, op1, imm); 327 } 328 } 329 330 template <template <typename T> class Base> 331 StaticInstPtr 332 decodeNeonUTwoShiftUSReg(unsigned size, 333 ExtMachInst machInst, IntRegIndex dest, 334 IntRegIndex op1, uint64_t imm) 335 { 336 switch (size) { 337 case 0: 338 return new Base<uint8_t>(machInst, dest, op1, imm); 339 case 1: 340 return new Base<uint16_t>(machInst, dest, op1, imm); 341 case 2: 342 return new Base<uint32_t>(machInst, dest, op1, imm); 343 default: 344 return new Unknown(machInst); 345 } 346 } 347 348 template <template <typename T> class BaseD, 349 template <typename T> class BaseQ> 350 StaticInstPtr 351 decodeNeonUTwoShiftSReg(bool q, unsigned size, 352 ExtMachInst machInst, IntRegIndex dest, 353 IntRegIndex op1, uint64_t imm) 354 { 355 if (q) { 356 return decodeNeonUTwoShiftUSReg<BaseQ>( 357 size, machInst, dest, op1, imm); 358 } else { 359 return decodeNeonUTwoShiftUSReg<BaseD>( 360 size, machInst, dest, op1, imm); 361 } 362 } 363 364 template <template <typename T> class Base> 365 StaticInstPtr 366 decodeNeonSTwoShiftUSReg(unsigned size, 367 ExtMachInst machInst, IntRegIndex dest, 368 IntRegIndex op1, uint64_t imm) 369 { 370 switch (size) { 371 case 0: 372 return new Base<int8_t>(machInst, dest, op1, imm); 373 case 1: 374 return new Base<int16_t>(machInst, dest, op1, imm); 375 case 2: 376 return new Base<int32_t>(machInst, dest, op1, imm); 377 default: 378 return new Unknown(machInst); 379 } 380 } 381 382 template <template <typename T> class BaseD, 383 template <typename T> class BaseQ> 384 StaticInstPtr 385 decodeNeonSTwoShiftSReg(bool q, unsigned size, 386 ExtMachInst machInst, IntRegIndex dest, 387 IntRegIndex op1, uint64_t imm) 388 { 389 if (q) { 390 return decodeNeonSTwoShiftUSReg<BaseQ>( 391 size, machInst, dest, op1, imm); 392 } else { 393 return decodeNeonSTwoShiftUSReg<BaseD>( 394 size, machInst, dest, op1, imm); 395 } 396 } 397 398 template <template <typename T> class BaseD, 399 template <typename T> class BaseQ> 400 StaticInstPtr 401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size, 402 ExtMachInst machInst, IntRegIndex dest, 403 IntRegIndex op1, uint64_t imm) 404 { 405 if (notSigned) { 406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 407 q, size, machInst, dest, op1, imm); 408 } else { 409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 410 q, size, machInst, dest, op1, imm); 411 } 412 } 413 414 template <template <typename T> class Base> 415 StaticInstPtr 416 decodeNeonUTwoMiscUSReg(unsigned size, 417 ExtMachInst machInst, IntRegIndex dest, 418 IntRegIndex op1) 419 { 420 switch (size) { 421 case 0: 422 return new Base<uint8_t>(machInst, dest, op1); 423 case 1: 424 return new Base<uint16_t>(machInst, dest, op1); 425 case 2: 426 return new Base<uint32_t>(machInst, dest, op1); 427 default: 428 return new Unknown(machInst); 429 } 430 } 431 432 template <template <typename T> class Base> 433 StaticInstPtr 434 decodeNeonSTwoMiscUSReg(unsigned size, 435 ExtMachInst machInst, IntRegIndex dest, 436 IntRegIndex op1) 437 { 438 switch (size) { 439 case 0: 440 return new Base<int8_t>(machInst, dest, op1); 441 case 1: 442 return new Base<int16_t>(machInst, dest, op1); 443 case 2: 444 return new Base<int32_t>(machInst, dest, op1); 445 default: 446 return new Unknown(machInst); 447 } 448 } 449 450 template <template <typename T> class BaseD, 451 template <typename T> class BaseQ> 452 StaticInstPtr 453 decodeNeonUTwoMiscSReg(bool q, unsigned size, 454 ExtMachInst machInst, IntRegIndex dest, 455 IntRegIndex op1) 456 { 457 if (q) { 458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 459 } else { 460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 461 } 462 } 463 464 template <template <typename T> class BaseD, 465 template <typename T> class BaseQ> 466 StaticInstPtr 467 decodeNeonSTwoMiscSReg(bool q, unsigned size, 468 ExtMachInst machInst, IntRegIndex dest, 469 IntRegIndex op1) 470 { 471 if (q) { 472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 473 } else { 474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 475 } 476 } 477 478 template <template <typename T> class Base> 479 StaticInstPtr 480 decodeNeonUTwoMiscUReg(unsigned size, 481 ExtMachInst machInst, IntRegIndex dest, 482 IntRegIndex op1) 483 { 484 switch (size) { 485 case 0: 486 return new Base<uint8_t>(machInst, dest, op1); 487 case 1: 488 return new Base<uint16_t>(machInst, dest, op1); 489 case 2: 490 return new Base<uint32_t>(machInst, dest, op1); 491 case 3: 492 return new Base<uint64_t>(machInst, dest, op1); 493 default: 494 return new Unknown(machInst); 495 } 496 } 497 498 template <template <typename T> class Base> 499 StaticInstPtr 500 decodeNeonSTwoMiscUReg(unsigned size, 501 ExtMachInst machInst, IntRegIndex dest, 502 IntRegIndex op1) 503 { 504 switch (size) { 505 case 0: 506 return new Base<int8_t>(machInst, dest, op1); 507 case 1: 508 return new Base<int16_t>(machInst, dest, op1); 509 case 2: 510 return new Base<int32_t>(machInst, dest, op1); 511 case 3: 512 return new Base<int64_t>(machInst, dest, op1); 513 default: 514 return new Unknown(machInst); 515 } 516 } 517 518 template <template <typename T> class BaseD, 519 template <typename T> class BaseQ> 520 StaticInstPtr 521 decodeNeonSTwoMiscReg(bool q, unsigned size, 522 ExtMachInst machInst, IntRegIndex dest, 523 IntRegIndex op1) 524 { 525 if (q) { 526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 527 } else { 528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1); 529 } 530 } 531 532 template <template <typename T> class BaseD, 533 template <typename T> class BaseQ> 534 StaticInstPtr 535 decodeNeonUTwoMiscReg(bool q, unsigned size, 536 ExtMachInst machInst, IntRegIndex dest, 537 IntRegIndex op1) 538 { 539 if (q) { 540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 541 } else { 542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1); 543 } 544 } 545 546 template <template <typename T> class BaseD, 547 template <typename T> class BaseQ> 548 StaticInstPtr 549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size, 550 ExtMachInst machInst, IntRegIndex dest, 551 IntRegIndex op1) 552 { 553 if (notSigned) { 554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 555 q, size, machInst, dest, op1); 556 } else { 557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 558 q, size, machInst, dest, op1); 559 } 560 } 561 562}}; 563 564output exec {{ 565 static float 566 vcgtFunc(float op1, float op2) 567 { 568 if (isSnan(op1) || isSnan(op2)) 569 return 2.0; 570 return (op1 > op2) ? 0.0 : 1.0; 571 } 572 573 static float 574 vcgeFunc(float op1, float op2) 575 { 576 if (isSnan(op1) || isSnan(op2)) 577 return 2.0; 578 return (op1 >= op2) ? 0.0 : 1.0; 579 } 580 581 static float 582 vceqFunc(float op1, float op2) 583 { 584 if (isSnan(op1) || isSnan(op2)) 585 return 2.0; 586 return (op1 == op2) ? 0.0 : 1.0; 587 } 588 589 static float 590 vcleFunc(float op1, float op2) 591 { 592 if (isSnan(op1) || isSnan(op2)) 593 return 2.0; 594 return (op1 <= op2) ? 0.0 : 1.0; 595 } 596 597 static float 598 vcltFunc(float op1, float op2) 599 { 600 if (isSnan(op1) || isSnan(op2)) 601 return 2.0; 602 return (op1 < op2) ? 0.0 : 1.0; 603 } 604 605 static float 606 vacgtFunc(float op1, float op2) 607 { 608 if (isSnan(op1) || isSnan(op2)) 609 return 2.0; 610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0; 611 } 612 613 static float 614 vacgeFunc(float op1, float op2) 615 { 616 if (isSnan(op1) || isSnan(op2)) 617 return 2.0; 618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0; 619 } 620}}; 621 622let {{ 623 simdEnabledCheckCode = ''' 624 if (!neonEnabled(Cpacr, Cpsr, Fpexc)) 625 return disabledFault(); 626 ''' 627}}; 628 629let {{ 630 631 header_output = "" 632 exec_output = "" 633 634 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t") 635 unsignedTypes = smallUnsignedTypes + ("uint64_t",) 636 smallSignedTypes = ("int8_t", "int16_t", "int32_t") 637 signedTypes = smallSignedTypes + ("int64_t",) 638 smallTypes = smallUnsignedTypes + smallSignedTypes 639 allTypes = unsignedTypes + signedTypes 640 641 def threeEqualRegInst(name, Name, types, rCount, op, 642 readDest=False, pairwise=False): 643 global header_output, exec_output 644 eWalkCode = simdEnabledCheckCode + ''' 645 RegVect srcReg1, srcReg2, destReg; 646 ''' 647 for reg in range(rCount): 648 eWalkCode += ''' 649 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 650 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 651 ''' % { "reg" : reg } 652 if readDest: 653 eWalkCode += ''' 654 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 655 ''' % { "reg" : reg } 656 readDestCode = '' 657 if readDest: 658 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 659 if pairwise: 660 eWalkCode += ''' 661 for (unsigned i = 0; i < eCount; i++) { 662 Element srcElem1 = gtoh(2 * i < eCount ? 663 srcReg1.elements[2 * i] : 664 srcReg2.elements[2 * i - eCount]); 665 Element srcElem2 = gtoh(2 * i < eCount ? 666 srcReg1.elements[2 * i + 1] : 667 srcReg2.elements[2 * i + 1 - eCount]); 668 Element destElem; 669 %(readDest)s 670 %(op)s 671 destReg.elements[i] = htog(destElem); 672 } 673 ''' % { "op" : op, "readDest" : readDestCode } 674 else: 675 eWalkCode += ''' 676 for (unsigned i = 0; i < eCount; i++) { 677 Element srcElem1 = gtoh(srcReg1.elements[i]); 678 Element srcElem2 = gtoh(srcReg2.elements[i]); 679 Element destElem; 680 %(readDest)s 681 %(op)s 682 destReg.elements[i] = htog(destElem); 683 } 684 ''' % { "op" : op, "readDest" : readDestCode } 685 for reg in range(rCount): 686 eWalkCode += ''' 687 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 688 ''' % { "reg" : reg } 689 iop = InstObjParams(name, Name, 690 "RegRegRegOp", 691 { "code": eWalkCode, 692 "r_count": rCount, 693 "predicate_test": predicateTest }, []) 694 header_output += NeonRegRegRegOpDeclare.subst(iop) 695 exec_output += NeonEqualRegExecute.subst(iop) 696 for type in types: 697 substDict = { "targs" : type, 698 "class_name" : Name } 699 exec_output += NeonExecDeclare.subst(substDict) 700 701 def threeEqualRegInstFp(name, Name, types, rCount, op, 702 readDest=False, pairwise=False, toInt=False): 703 global header_output, exec_output 704 eWalkCode = simdEnabledCheckCode + ''' 705 typedef FloatReg FloatVect[rCount]; 706 FloatVect srcRegs1, srcRegs2; 707 ''' 708 if toInt: 709 eWalkCode += 'RegVect destRegs;\n' 710 else: 711 eWalkCode += 'FloatVect destRegs;\n' 712 for reg in range(rCount): 713 eWalkCode += ''' 714 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 715 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 716 ''' % { "reg" : reg } 717 if readDest: 718 if toInt: 719 eWalkCode += ''' 720 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 721 ''' % { "reg" : reg } 722 else: 723 eWalkCode += ''' 724 destRegs[%(reg)d] = FpDestP%(reg)d; 725 ''' % { "reg" : reg } 726 readDestCode = '' 727 if readDest: 728 readDestCode = 'destReg = destRegs[r];' 729 destType = 'FloatReg' 730 writeDest = 'destRegs[r] = destReg;' 731 if toInt: 732 destType = 'FloatRegBits' 733 writeDest = 'destRegs.regs[r] = destReg;' 734 if pairwise: 735 eWalkCode += ''' 736 for (unsigned r = 0; r < rCount; r++) { 737 FloatReg srcReg1 = (2 * r < rCount) ? 738 srcRegs1[2 * r] : srcRegs2[2 * r - rCount]; 739 FloatReg srcReg2 = (2 * r < rCount) ? 740 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount]; 741 %(destType)s destReg; 742 %(readDest)s 743 %(op)s 744 %(writeDest)s 745 } 746 ''' % { "op" : op, 747 "readDest" : readDestCode, 748 "destType" : destType, 749 "writeDest" : writeDest } 750 else: 751 eWalkCode += ''' 752 for (unsigned r = 0; r < rCount; r++) { 753 FloatReg srcReg1 = srcRegs1[r]; 754 FloatReg srcReg2 = srcRegs2[r]; 755 %(destType)s destReg; 756 %(readDest)s 757 %(op)s 758 %(writeDest)s 759 } 760 ''' % { "op" : op, 761 "readDest" : readDestCode, 762 "destType" : destType, 763 "writeDest" : writeDest } 764 for reg in range(rCount): 765 if toInt: 766 eWalkCode += ''' 767 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d]; 768 ''' % { "reg" : reg } 769 else: 770 eWalkCode += ''' 771 FpDestP%(reg)d = destRegs[%(reg)d]; 772 ''' % { "reg" : reg } 773 iop = InstObjParams(name, Name, 774 "FpRegRegRegOp", 775 { "code": eWalkCode, 776 "r_count": rCount, 777 "predicate_test": predicateTest }, []) 778 header_output += NeonRegRegRegOpDeclare.subst(iop) 779 exec_output += NeonEqualRegExecute.subst(iop) 780 for type in types: 781 substDict = { "targs" : type, 782 "class_name" : Name } 783 exec_output += NeonExecDeclare.subst(substDict) 784 785 def threeUnequalRegInst(name, Name, types, op, 786 bigSrc1, bigSrc2, bigDest, readDest): 787 global header_output, exec_output 788 src1Cnt = src2Cnt = destCnt = 2 789 src1Prefix = src2Prefix = destPrefix = '' 790 if bigSrc1: 791 src1Cnt = 4 792 src1Prefix = 'Big' 793 if bigSrc2: 794 src2Cnt = 4 795 src2Prefix = 'Big' 796 if bigDest: 797 destCnt = 4 798 destPrefix = 'Big' 799 eWalkCode = simdEnabledCheckCode + ''' 800 %sRegVect srcReg1; 801 %sRegVect srcReg2; 802 %sRegVect destReg; 803 ''' % (src1Prefix, src2Prefix, destPrefix) 804 for reg in range(src1Cnt): 805 eWalkCode += ''' 806 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 807 ''' % { "reg" : reg } 808 for reg in range(src2Cnt): 809 eWalkCode += ''' 810 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 811 ''' % { "reg" : reg } 812 if readDest: 813 for reg in range(destCnt): 814 eWalkCode += ''' 815 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 816 ''' % { "reg" : reg } 817 readDestCode = '' 818 if readDest: 819 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 820 eWalkCode += ''' 821 for (unsigned i = 0; i < eCount; i++) { 822 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 823 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]); 824 %(destPrefix)sElement destElem; 825 %(readDest)s 826 %(op)s 827 destReg.elements[i] = htog(destElem); 828 } 829 ''' % { "op" : op, "readDest" : readDestCode, 830 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 831 "destPrefix" : destPrefix } 832 for reg in range(destCnt): 833 eWalkCode += ''' 834 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 835 ''' % { "reg" : reg } 836 iop = InstObjParams(name, Name, 837 "RegRegRegOp", 838 { "code": eWalkCode, 839 "r_count": 2, 840 "predicate_test": predicateTest }, []) 841 header_output += NeonRegRegRegOpDeclare.subst(iop) 842 exec_output += NeonUnequalRegExecute.subst(iop) 843 for type in types: 844 substDict = { "targs" : type, 845 "class_name" : Name } 846 exec_output += NeonExecDeclare.subst(substDict) 847 848 def threeRegNarrowInst(name, Name, types, op, readDest=False): 849 threeUnequalRegInst(name, Name, types, op, 850 True, True, False, readDest) 851 852 def threeRegLongInst(name, Name, types, op, readDest=False): 853 threeUnequalRegInst(name, Name, types, op, 854 False, False, True, readDest) 855 856 def threeRegWideInst(name, Name, types, op, readDest=False): 857 threeUnequalRegInst(name, Name, types, op, 858 True, False, True, readDest) 859 860 def twoEqualRegInst(name, Name, types, rCount, op, readDest=False): 861 global header_output, exec_output 862 eWalkCode = simdEnabledCheckCode + ''' 863 RegVect srcReg1, srcReg2, destReg; 864 ''' 865 for reg in range(rCount): 866 eWalkCode += ''' 867 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 868 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 869 ''' % { "reg" : reg } 870 if readDest: 871 eWalkCode += ''' 872 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 873 ''' % { "reg" : reg } 874 readDestCode = '' 875 if readDest: 876 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 877 eWalkCode += ''' 878 assert(imm >= 0 && imm < eCount); 879 for (unsigned i = 0; i < eCount; i++) { 880 Element srcElem1 = gtoh(srcReg1.elements[i]); 881 Element srcElem2 = gtoh(srcReg2.elements[imm]); 882 Element destElem; 883 %(readDest)s 884 %(op)s 885 destReg.elements[i] = htog(destElem); 886 } 887 ''' % { "op" : op, "readDest" : readDestCode } 888 for reg in range(rCount): 889 eWalkCode += ''' 890 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 891 ''' % { "reg" : reg } 892 iop = InstObjParams(name, Name, 893 "RegRegRegImmOp", 894 { "code": eWalkCode, 895 "r_count": rCount, 896 "predicate_test": predicateTest }, []) 897 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 898 exec_output += NeonEqualRegExecute.subst(iop) 899 for type in types: 900 substDict = { "targs" : type, 901 "class_name" : Name } 902 exec_output += NeonExecDeclare.subst(substDict) 903 904 def twoRegLongInst(name, Name, types, op, readDest=False): 905 global header_output, exec_output 906 rCount = 2 907 eWalkCode = simdEnabledCheckCode + ''' 908 RegVect srcReg1, srcReg2; 909 BigRegVect destReg; 910 ''' 911 for reg in range(rCount): 912 eWalkCode += ''' 913 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 914 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);; 915 ''' % { "reg" : reg } 916 if readDest: 917 for reg in range(2 * rCount): 918 eWalkCode += ''' 919 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 920 ''' % { "reg" : reg } 921 readDestCode = '' 922 if readDest: 923 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 924 eWalkCode += ''' 925 assert(imm >= 0 && imm < eCount); 926 for (unsigned i = 0; i < eCount; i++) { 927 Element srcElem1 = gtoh(srcReg1.elements[i]); 928 Element srcElem2 = gtoh(srcReg2.elements[imm]); 929 BigElement destElem; 930 %(readDest)s 931 %(op)s 932 destReg.elements[i] = htog(destElem); 933 } 934 ''' % { "op" : op, "readDest" : readDestCode } 935 for reg in range(2 * rCount): 936 eWalkCode += ''' 937 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 938 ''' % { "reg" : reg } 939 iop = InstObjParams(name, Name, 940 "RegRegRegImmOp", 941 { "code": eWalkCode, 942 "r_count": rCount, 943 "predicate_test": predicateTest }, []) 944 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 945 exec_output += NeonUnequalRegExecute.subst(iop) 946 for type in types: 947 substDict = { "targs" : type, 948 "class_name" : Name } 949 exec_output += NeonExecDeclare.subst(substDict) 950 951 def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False): 952 global header_output, exec_output 953 eWalkCode = simdEnabledCheckCode + ''' 954 typedef FloatReg FloatVect[rCount]; 955 FloatVect srcRegs1, srcRegs2, destRegs; 956 ''' 957 for reg in range(rCount): 958 eWalkCode += ''' 959 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 960 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 961 ''' % { "reg" : reg } 962 if readDest: 963 eWalkCode += ''' 964 destRegs[%(reg)d] = FpDestP%(reg)d; 965 ''' % { "reg" : reg } 966 readDestCode = '' 967 if readDest: 968 readDestCode = 'destReg = destRegs[i];' 969 eWalkCode += ''' 970 assert(imm >= 0 && imm < rCount); 971 for (unsigned i = 0; i < rCount; i++) { 972 FloatReg srcReg1 = srcRegs1[i]; 973 FloatReg srcReg2 = srcRegs2[imm]; 974 FloatReg destReg; 975 %(readDest)s 976 %(op)s 977 destRegs[i] = destReg; 978 } 979 ''' % { "op" : op, "readDest" : readDestCode } 980 for reg in range(rCount): 981 eWalkCode += ''' 982 FpDestP%(reg)d = destRegs[%(reg)d]; 983 ''' % { "reg" : reg } 984 iop = InstObjParams(name, Name, 985 "FpRegRegRegImmOp", 986 { "code": eWalkCode, 987 "r_count": rCount, 988 "predicate_test": predicateTest }, []) 989 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 990 exec_output += NeonEqualRegExecute.subst(iop) 991 for type in types: 992 substDict = { "targs" : type, 993 "class_name" : Name } 994 exec_output += NeonExecDeclare.subst(substDict) 995 996 def twoRegShiftInst(name, Name, types, rCount, op, 997 readDest=False, toInt=False, fromInt=False): 998 global header_output, exec_output 999 eWalkCode = simdEnabledCheckCode + ''' 1000 RegVect srcRegs1, destRegs; 1001 ''' 1002 for reg in range(rCount): 1003 eWalkCode += ''' 1004 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1005 ''' % { "reg" : reg } 1006 if readDest: 1007 eWalkCode += ''' 1008 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1009 ''' % { "reg" : reg } 1010 readDestCode = '' 1011 if readDest: 1012 readDestCode = 'destElem = gtoh(destRegs.elements[i]);' 1013 if toInt: 1014 readDestCode = 'destReg = gtoh(destRegs.regs[i]);' 1015 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);' 1016 if fromInt: 1017 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);' 1018 declDest = 'Element destElem;' 1019 writeDestCode = 'destRegs.elements[i] = htog(destElem);' 1020 if toInt: 1021 declDest = 'FloatRegBits destReg;' 1022 writeDestCode = 'destRegs.regs[i] = htog(destReg);' 1023 eWalkCode += ''' 1024 for (unsigned i = 0; i < eCount; i++) { 1025 %(readOp)s 1026 %(declDest)s 1027 %(readDest)s 1028 %(op)s 1029 %(writeDest)s 1030 } 1031 ''' % { "readOp" : readOpCode, 1032 "declDest" : declDest, 1033 "readDest" : readDestCode, 1034 "op" : op, 1035 "writeDest" : writeDestCode } 1036 for reg in range(rCount): 1037 eWalkCode += ''' 1038 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]); 1039 ''' % { "reg" : reg } 1040 iop = InstObjParams(name, Name, 1041 "RegRegImmOp", 1042 { "code": eWalkCode, 1043 "r_count": rCount, 1044 "predicate_test": predicateTest }, []) 1045 header_output += NeonRegRegImmOpDeclare.subst(iop) 1046 exec_output += NeonEqualRegExecute.subst(iop) 1047 for type in types: 1048 substDict = { "targs" : type, 1049 "class_name" : Name } 1050 exec_output += NeonExecDeclare.subst(substDict) 1051 1052 def twoRegNarrowShiftInst(name, Name, types, op, readDest=False): 1053 global header_output, exec_output 1054 eWalkCode = simdEnabledCheckCode + ''' 1055 BigRegVect srcReg1; 1056 RegVect destReg; 1057 ''' 1058 for reg in range(4): 1059 eWalkCode += ''' 1060 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1061 ''' % { "reg" : reg } 1062 if readDest: 1063 for reg in range(2): 1064 eWalkCode += ''' 1065 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1066 ''' % { "reg" : reg } 1067 readDestCode = '' 1068 if readDest: 1069 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1070 eWalkCode += ''' 1071 for (unsigned i = 0; i < eCount; i++) { 1072 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1073 Element destElem; 1074 %(readDest)s 1075 %(op)s 1076 destReg.elements[i] = htog(destElem); 1077 } 1078 ''' % { "op" : op, "readDest" : readDestCode } 1079 for reg in range(2): 1080 eWalkCode += ''' 1081 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1082 ''' % { "reg" : reg } 1083 iop = InstObjParams(name, Name, 1084 "RegRegImmOp", 1085 { "code": eWalkCode, 1086 "r_count": 2, 1087 "predicate_test": predicateTest }, []) 1088 header_output += NeonRegRegImmOpDeclare.subst(iop) 1089 exec_output += NeonUnequalRegExecute.subst(iop) 1090 for type in types: 1091 substDict = { "targs" : type, 1092 "class_name" : Name } 1093 exec_output += NeonExecDeclare.subst(substDict) 1094 1095 def twoRegLongShiftInst(name, Name, types, op, readDest=False): 1096 global header_output, exec_output 1097 eWalkCode = simdEnabledCheckCode + ''' 1098 RegVect srcReg1; 1099 BigRegVect destReg; 1100 ''' 1101 for reg in range(2): 1102 eWalkCode += ''' 1103 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1104 ''' % { "reg" : reg } 1105 if readDest: 1106 for reg in range(4): 1107 eWalkCode += ''' 1108 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1109 ''' % { "reg" : reg } 1110 readDestCode = '' 1111 if readDest: 1112 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1113 eWalkCode += ''' 1114 for (unsigned i = 0; i < eCount; i++) { 1115 Element srcElem1 = gtoh(srcReg1.elements[i]); 1116 BigElement destElem; 1117 %(readDest)s 1118 %(op)s 1119 destReg.elements[i] = htog(destElem); 1120 } 1121 ''' % { "op" : op, "readDest" : readDestCode } 1122 for reg in range(4): 1123 eWalkCode += ''' 1124 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1125 ''' % { "reg" : reg } 1126 iop = InstObjParams(name, Name, 1127 "RegRegImmOp", 1128 { "code": eWalkCode, 1129 "r_count": 2, 1130 "predicate_test": predicateTest }, []) 1131 header_output += NeonRegRegImmOpDeclare.subst(iop) 1132 exec_output += NeonUnequalRegExecute.subst(iop) 1133 for type in types: 1134 substDict = { "targs" : type, 1135 "class_name" : Name } 1136 exec_output += NeonExecDeclare.subst(substDict) 1137 1138 def twoRegMiscInst(name, Name, types, rCount, op, readDest=False): 1139 global header_output, exec_output 1140 eWalkCode = simdEnabledCheckCode + ''' 1141 RegVect srcReg1, destReg; 1142 ''' 1143 for reg in range(rCount): 1144 eWalkCode += ''' 1145 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1146 ''' % { "reg" : reg } 1147 if readDest: 1148 eWalkCode += ''' 1149 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1150 ''' % { "reg" : reg } 1151 readDestCode = '' 1152 if readDest: 1153 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1154 eWalkCode += ''' 1155 for (unsigned i = 0; i < eCount; i++) { 1156 unsigned j = i; 1157 Element srcElem1 = gtoh(srcReg1.elements[i]); 1158 Element destElem; 1159 %(readDest)s 1160 %(op)s 1161 destReg.elements[j] = htog(destElem); 1162 } 1163 ''' % { "op" : op, "readDest" : readDestCode } 1164 for reg in range(rCount): 1165 eWalkCode += ''' 1166 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1167 ''' % { "reg" : reg } 1168 iop = InstObjParams(name, Name, 1169 "RegRegOp", 1170 { "code": eWalkCode, 1171 "r_count": rCount, 1172 "predicate_test": predicateTest }, []) 1173 header_output += NeonRegRegOpDeclare.subst(iop) 1174 exec_output += NeonEqualRegExecute.subst(iop) 1175 for type in types: 1176 substDict = { "targs" : type, 1177 "class_name" : Name } 1178 exec_output += NeonExecDeclare.subst(substDict) 1179 1180 def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False): 1181 global header_output, exec_output 1182 eWalkCode = simdEnabledCheckCode + ''' 1183 RegVect srcReg1, destReg; 1184 ''' 1185 for reg in range(rCount): 1186 eWalkCode += ''' 1187 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1188 ''' % { "reg" : reg } 1189 if readDest: 1190 eWalkCode += ''' 1191 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1192 ''' % { "reg" : reg } 1193 readDestCode = '' 1194 if readDest: 1195 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1196 eWalkCode += ''' 1197 for (unsigned i = 0; i < eCount; i++) { 1198 Element srcElem1 = gtoh(srcReg1.elements[imm]); 1199 Element destElem; 1200 %(readDest)s 1201 %(op)s 1202 destReg.elements[i] = htog(destElem); 1203 } 1204 ''' % { "op" : op, "readDest" : readDestCode } 1205 for reg in range(rCount): 1206 eWalkCode += ''' 1207 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1208 ''' % { "reg" : reg } 1209 iop = InstObjParams(name, Name, 1210 "RegRegImmOp", 1211 { "code": eWalkCode, 1212 "r_count": rCount, 1213 "predicate_test": predicateTest }, []) 1214 header_output += NeonRegRegImmOpDeclare.subst(iop) 1215 exec_output += NeonEqualRegExecute.subst(iop) 1216 for type in types: 1217 substDict = { "targs" : type, 1218 "class_name" : Name } 1219 exec_output += NeonExecDeclare.subst(substDict) 1220 1221 def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False): 1222 global header_output, exec_output 1223 eWalkCode = simdEnabledCheckCode + ''' 1224 RegVect srcReg1, destReg; 1225 ''' 1226 for reg in range(rCount): 1227 eWalkCode += ''' 1228 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1229 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1230 ''' % { "reg" : reg } 1231 if readDest: 1232 eWalkCode += ''' 1233 ''' % { "reg" : reg } 1234 readDestCode = '' 1235 if readDest: 1236 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1237 eWalkCode += op 1238 for reg in range(rCount): 1239 eWalkCode += ''' 1240 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1241 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]); 1242 ''' % { "reg" : reg } 1243 iop = InstObjParams(name, Name, 1244 "RegRegOp", 1245 { "code": eWalkCode, 1246 "r_count": rCount, 1247 "predicate_test": predicateTest }, []) 1248 header_output += NeonRegRegOpDeclare.subst(iop) 1249 exec_output += NeonEqualRegExecute.subst(iop) 1250 for type in types: 1251 substDict = { "targs" : type, 1252 "class_name" : Name } 1253 exec_output += NeonExecDeclare.subst(substDict) 1254 1255 def twoRegMiscInstFp(name, Name, types, rCount, op, 1256 readDest=False, toInt=False): 1257 global header_output, exec_output 1258 eWalkCode = simdEnabledCheckCode + ''' 1259 typedef FloatReg FloatVect[rCount]; 1260 FloatVect srcRegs1; 1261 ''' 1262 if toInt: 1263 eWalkCode += 'RegVect destRegs;\n' 1264 else: 1265 eWalkCode += 'FloatVect destRegs;\n' 1266 for reg in range(rCount): 1267 eWalkCode += ''' 1268 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1269 ''' % { "reg" : reg } 1270 if readDest: 1271 if toInt: 1272 eWalkCode += ''' 1273 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1274 ''' % { "reg" : reg } 1275 else: 1276 eWalkCode += ''' 1277 destRegs[%(reg)d] = FpDestP%(reg)d; 1278 ''' % { "reg" : reg } 1279 readDestCode = '' 1280 if readDest: 1281 readDestCode = 'destReg = destRegs[i];' 1282 destType = 'FloatReg' 1283 writeDest = 'destRegs[r] = destReg;' 1284 if toInt: 1285 destType = 'FloatRegBits' 1286 writeDest = 'destRegs.regs[r] = destReg;' 1287 eWalkCode += ''' 1288 for (unsigned r = 0; r < rCount; r++) { 1289 FloatReg srcReg1 = srcRegs1[r]; 1290 %(destType)s destReg; 1291 %(readDest)s 1292 %(op)s 1293 %(writeDest)s 1294 } 1295 ''' % { "op" : op, 1296 "readDest" : readDestCode, 1297 "destType" : destType, 1298 "writeDest" : writeDest } 1299 for reg in range(rCount): 1300 if toInt: 1301 eWalkCode += ''' 1302 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d]; 1303 ''' % { "reg" : reg } 1304 else: 1305 eWalkCode += ''' 1306 FpDestP%(reg)d = destRegs[%(reg)d]; 1307 ''' % { "reg" : reg } 1308 iop = InstObjParams(name, Name, 1309 "FpRegRegOp", 1310 { "code": eWalkCode, 1311 "r_count": rCount, 1312 "predicate_test": predicateTest }, []) 1313 header_output += NeonRegRegOpDeclare.subst(iop) 1314 exec_output += NeonEqualRegExecute.subst(iop) 1315 for type in types: 1316 substDict = { "targs" : type, 1317 "class_name" : Name } 1318 exec_output += NeonExecDeclare.subst(substDict) 1319 1320 def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False): 1321 global header_output, exec_output 1322 eWalkCode = simdEnabledCheckCode + ''' 1323 RegVect srcRegs; 1324 BigRegVect destReg; 1325 ''' 1326 for reg in range(rCount): 1327 eWalkCode += ''' 1328 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1329 ''' % { "reg" : reg } 1330 if readDest: 1331 eWalkCode += ''' 1332 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1333 ''' % { "reg" : reg } 1334 readDestCode = '' 1335 if readDest: 1336 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1337 eWalkCode += ''' 1338 for (unsigned i = 0; i < eCount / 2; i++) { 1339 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 1340 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 1341 BigElement destElem; 1342 %(readDest)s 1343 %(op)s 1344 destReg.elements[i] = htog(destElem); 1345 } 1346 ''' % { "op" : op, "readDest" : readDestCode } 1347 for reg in range(rCount): 1348 eWalkCode += ''' 1349 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1350 ''' % { "reg" : reg } 1351 iop = InstObjParams(name, Name, 1352 "RegRegOp", 1353 { "code": eWalkCode, 1354 "r_count": rCount, 1355 "predicate_test": predicateTest }, []) 1356 header_output += NeonRegRegOpDeclare.subst(iop) 1357 exec_output += NeonUnequalRegExecute.subst(iop) 1358 for type in types: 1359 substDict = { "targs" : type, 1360 "class_name" : Name } 1361 exec_output += NeonExecDeclare.subst(substDict) 1362 1363 def twoRegNarrowMiscInst(name, Name, types, op, readDest=False): 1364 global header_output, exec_output 1365 eWalkCode = simdEnabledCheckCode + ''' 1366 BigRegVect srcReg1; 1367 RegVect destReg; 1368 ''' 1369 for reg in range(4): 1370 eWalkCode += ''' 1371 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1372 ''' % { "reg" : reg } 1373 if readDest: 1374 for reg in range(2): 1375 eWalkCode += ''' 1376 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1377 ''' % { "reg" : reg } 1378 readDestCode = '' 1379 if readDest: 1380 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1381 eWalkCode += ''' 1382 for (unsigned i = 0; i < eCount; i++) { 1383 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1384 Element destElem; 1385 %(readDest)s 1386 %(op)s 1387 destReg.elements[i] = htog(destElem); 1388 } 1389 ''' % { "op" : op, "readDest" : readDestCode } 1390 for reg in range(2): 1391 eWalkCode += ''' 1392 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1393 ''' % { "reg" : reg } 1394 iop = InstObjParams(name, Name, 1395 "RegRegOp", 1396 { "code": eWalkCode, 1397 "r_count": 2, 1398 "predicate_test": predicateTest }, []) 1399 header_output += NeonRegRegOpDeclare.subst(iop) 1400 exec_output += NeonUnequalRegExecute.subst(iop) 1401 for type in types: 1402 substDict = { "targs" : type, 1403 "class_name" : Name } 1404 exec_output += NeonExecDeclare.subst(substDict) 1405 1406 def oneRegImmInst(name, Name, types, rCount, op, readDest=False): 1407 global header_output, exec_output 1408 eWalkCode = simdEnabledCheckCode + ''' 1409 RegVect destReg; 1410 ''' 1411 if readDest: 1412 for reg in range(rCount): 1413 eWalkCode += ''' 1414 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1415 ''' % { "reg" : reg } 1416 readDestCode = '' 1417 if readDest: 1418 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1419 eWalkCode += ''' 1420 for (unsigned i = 0; i < eCount; i++) { 1421 Element destElem; 1422 %(readDest)s 1423 %(op)s 1424 destReg.elements[i] = htog(destElem); 1425 } 1426 ''' % { "op" : op, "readDest" : readDestCode } 1427 for reg in range(rCount): 1428 eWalkCode += ''' 1429 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1430 ''' % { "reg" : reg } 1431 iop = InstObjParams(name, Name, 1432 "RegImmOp", 1433 { "code": eWalkCode, 1434 "r_count": rCount, 1435 "predicate_test": predicateTest }, []) 1436 header_output += NeonRegImmOpDeclare.subst(iop) 1437 exec_output += NeonEqualRegExecute.subst(iop) 1438 for type in types: 1439 substDict = { "targs" : type, 1440 "class_name" : Name } 1441 exec_output += NeonExecDeclare.subst(substDict) 1442 1443 def twoRegLongMiscInst(name, Name, types, op, readDest=False): 1444 global header_output, exec_output 1445 eWalkCode = simdEnabledCheckCode + ''' 1446 RegVect srcReg1; 1447 BigRegVect destReg; 1448 ''' 1449 for reg in range(2): 1450 eWalkCode += ''' 1451 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1452 ''' % { "reg" : reg } 1453 if readDest: 1454 for reg in range(4): 1455 eWalkCode += ''' 1456 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1457 ''' % { "reg" : reg } 1458 readDestCode = '' 1459 if readDest: 1460 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1461 eWalkCode += ''' 1462 for (unsigned i = 0; i < eCount; i++) { 1463 Element srcElem1 = gtoh(srcReg1.elements[i]); 1464 BigElement destElem; 1465 %(readDest)s 1466 %(op)s 1467 destReg.elements[i] = htog(destElem); 1468 } 1469 ''' % { "op" : op, "readDest" : readDestCode } 1470 for reg in range(4): 1471 eWalkCode += ''' 1472 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1473 ''' % { "reg" : reg } 1474 iop = InstObjParams(name, Name, 1475 "RegRegOp", 1476 { "code": eWalkCode, 1477 "r_count": 2, 1478 "predicate_test": predicateTest }, []) 1479 header_output += NeonRegRegOpDeclare.subst(iop) 1480 exec_output += NeonUnequalRegExecute.subst(iop) 1481 for type in types: 1482 substDict = { "targs" : type, 1483 "class_name" : Name } 1484 exec_output += NeonExecDeclare.subst(substDict) 1485 1486 vhaddCode = ''' 1487 Element carryBit = 1488 (((unsigned)srcElem1 & 0x1) + 1489 ((unsigned)srcElem2 & 0x1)) >> 1; 1490 // Use division instead of a shift to ensure the sign extension works 1491 // right. The compiler will figure out if it can be a shift. Mask the 1492 // inputs so they get truncated correctly. 1493 destElem = (((srcElem1 & ~(Element)1) / 2) + 1494 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1495 ''' 1496 threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode) 1497 threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode) 1498 1499 vrhaddCode = ''' 1500 Element carryBit = 1501 (((unsigned)srcElem1 & 0x1) + 1502 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 1503 // Use division instead of a shift to ensure the sign extension works 1504 // right. The compiler will figure out if it can be a shift. Mask the 1505 // inputs so they get truncated correctly. 1506 destElem = (((srcElem1 & ~(Element)1) / 2) + 1507 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1508 ''' 1509 threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode) 1510 threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode) 1511 1512 vhsubCode = ''' 1513 Element barrowBit = 1514 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1515 // Use division instead of a shift to ensure the sign extension works 1516 // right. The compiler will figure out if it can be a shift. Mask the 1517 // inputs so they get truncated correctly. 1518 destElem = (((srcElem1 & ~(Element)1) / 2) - 1519 ((srcElem2 & ~(Element)1) / 2)) - barrowBit; 1520 ''' 1521 threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode) 1522 threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode) 1523 1524 vandCode = ''' 1525 destElem = srcElem1 & srcElem2; 1526 ''' 1527 threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode) 1528 threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode) 1529 1530 vbicCode = ''' 1531 destElem = srcElem1 & ~srcElem2; 1532 ''' 1533 threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode) 1534 threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode) 1535 1536 vorrCode = ''' 1537 destElem = srcElem1 | srcElem2; 1538 ''' 1539 threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode) 1540 threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode) 1541 1542 threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode) 1543 threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode) 1544 1545 vornCode = ''' 1546 destElem = srcElem1 | ~srcElem2; 1547 ''' 1548 threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode) 1549 threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode) 1550 1551 veorCode = ''' 1552 destElem = srcElem1 ^ srcElem2; 1553 ''' 1554 threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode) 1555 threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode) 1556 1557 vbifCode = ''' 1558 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2); 1559 ''' 1560 threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True) 1561 threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True) 1562 vbitCode = ''' 1563 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2); 1564 ''' 1565 threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True) 1566 threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True) 1567 vbslCode = ''' 1568 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem); 1569 ''' 1570 threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True) 1571 threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True) 1572 1573 vmaxCode = ''' 1574 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2; 1575 ''' 1576 threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode) 1577 threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode) 1578 1579 vminCode = ''' 1580 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2; 1581 ''' 1582 threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode) 1583 threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode) 1584 1585 vaddCode = ''' 1586 destElem = srcElem1 + srcElem2; 1587 ''' 1588 threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode) 1589 threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode) 1590 1591 threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes, 1592 2, vaddCode, pairwise=True) 1593 threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes, 1594 4, vaddCode, pairwise=True) 1595 vaddlwCode = ''' 1596 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 1597 ''' 1598 threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode) 1599 threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode) 1600 vaddhnCode = ''' 1601 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 1602 (sizeof(Element) * 8); 1603 ''' 1604 threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode) 1605 vraddhnCode = ''' 1606 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1607 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1608 (sizeof(Element) * 8); 1609 ''' 1610 threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode) 1611 1612 vsubCode = ''' 1613 destElem = srcElem1 - srcElem2; 1614 ''' 1615 threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode) 1616 threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode) 1617 vsublwCode = ''' 1618 destElem = (BigElement)srcElem1 - (BigElement)srcElem2; 1619 ''' 1620 threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode) 1621 threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode) 1622 1623 vqaddUCode = ''' 1624 destElem = srcElem1 + srcElem2; 1625 FPSCR fpscr = (FPSCR)Fpscr; 1626 if (destElem < srcElem1 || destElem < srcElem2) { 1627 destElem = (Element)(-1); 1628 fpscr.qc = 1; 1629 } 1630 Fpscr = fpscr; 1631 ''' 1632 threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode) 1633 threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode) 1634 vsubhnCode = ''' 1635 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 1636 (sizeof(Element) * 8); 1637 ''' 1638 threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode) 1639 vrsubhnCode = ''' 1640 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1641 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1642 (sizeof(Element) * 8); 1643 ''' 1644 threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode) 1645 1646 vqaddSCode = ''' 1647 destElem = srcElem1 + srcElem2; 1648 FPSCR fpscr = (FPSCR)Fpscr; 1649 bool negDest = (destElem < 0); 1650 bool negSrc1 = (srcElem1 < 0); 1651 bool negSrc2 = (srcElem2 < 0); 1652 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 1653 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1654 if (negDest) 1655 destElem -= 1; 1656 fpscr.qc = 1; 1657 } 1658 Fpscr = fpscr; 1659 ''' 1660 threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode) 1661 threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode) 1662 1663 vqsubUCode = ''' 1664 destElem = srcElem1 - srcElem2; 1665 FPSCR fpscr = (FPSCR)Fpscr; 1666 if (destElem > srcElem1) { 1667 destElem = 0; 1668 fpscr.qc = 1; 1669 } 1670 Fpscr = fpscr; 1671 ''' 1672 threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode) 1673 threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode) 1674 1675 vqsubSCode = ''' 1676 destElem = srcElem1 - srcElem2; 1677 FPSCR fpscr = (FPSCR)Fpscr; 1678 bool negDest = (destElem < 0); 1679 bool negSrc1 = (srcElem1 < 0); 1680 bool posSrc2 = (srcElem2 >= 0); 1681 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 1682 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1683 if (negDest) 1684 destElem -= 1; 1685 fpscr.qc = 1; 1686 } 1687 Fpscr = fpscr; 1688 ''' 1689 threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode) 1690 threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode) 1691 1692 vcgtCode = ''' 1693 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0; 1694 ''' 1695 threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode) 1696 threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode) 1697 1698 vcgeCode = ''' 1699 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0; 1700 ''' 1701 threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode) 1702 threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode) 1703 1704 vceqCode = ''' 1705 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0; 1706 ''' 1707 threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode) 1708 threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode) 1709 1710 vshlCode = ''' 1711 int16_t shiftAmt = (int8_t)srcElem2; 1712 if (shiftAmt < 0) { 1713 shiftAmt = -shiftAmt; 1714 if (shiftAmt >= sizeof(Element) * 8) { 1715 shiftAmt = sizeof(Element) * 8 - 1; 1716 destElem = 0; 1717 } else { 1718 destElem = (srcElem1 >> shiftAmt); 1719 } 1720 // Make sure the right shift sign extended when it should. 1721 if (ltz(srcElem1) && !ltz(destElem)) { 1722 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1723 1 - shiftAmt)); 1724 } 1725 } else { 1726 if (shiftAmt >= sizeof(Element) * 8) { 1727 destElem = 0; 1728 } else { 1729 destElem = srcElem1 << shiftAmt; 1730 } 1731 } 1732 ''' 1733 threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode) 1734 threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode) 1735 1736 vrshlCode = ''' 1737 int16_t shiftAmt = (int8_t)srcElem2; 1738 if (shiftAmt < 0) { 1739 shiftAmt = -shiftAmt; 1740 Element rBit = 0; 1741 if (shiftAmt <= sizeof(Element) * 8) 1742 rBit = bits(srcElem1, shiftAmt - 1); 1743 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 1744 rBit = 1; 1745 if (shiftAmt >= sizeof(Element) * 8) { 1746 shiftAmt = sizeof(Element) * 8 - 1; 1747 destElem = 0; 1748 } else { 1749 destElem = (srcElem1 >> shiftAmt); 1750 } 1751 // Make sure the right shift sign extended when it should. 1752 if (ltz(srcElem1) && !ltz(destElem)) { 1753 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1754 1 - shiftAmt)); 1755 } 1756 destElem += rBit; 1757 } else if (shiftAmt > 0) { 1758 if (shiftAmt >= sizeof(Element) * 8) { 1759 destElem = 0; 1760 } else { 1761 destElem = srcElem1 << shiftAmt; 1762 } 1763 } else { 1764 destElem = srcElem1; 1765 } 1766 ''' 1767 threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode) 1768 threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode) 1769 1770 vqshlUCode = ''' 1771 int16_t shiftAmt = (int8_t)srcElem2; 1772 FPSCR fpscr = (FPSCR)Fpscr; 1773 if (shiftAmt < 0) { 1774 shiftAmt = -shiftAmt; 1775 if (shiftAmt >= sizeof(Element) * 8) { 1776 shiftAmt = sizeof(Element) * 8 - 1; 1777 destElem = 0; 1778 } else { 1779 destElem = (srcElem1 >> shiftAmt); 1780 } 1781 } else if (shiftAmt > 0) { 1782 if (shiftAmt >= sizeof(Element) * 8) { 1783 if (srcElem1 != 0) { 1784 destElem = mask(sizeof(Element) * 8); 1785 fpscr.qc = 1; 1786 } else { 1787 destElem = 0; 1788 } 1789 } else { 1790 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1791 sizeof(Element) * 8 - shiftAmt)) { 1792 destElem = mask(sizeof(Element) * 8); 1793 fpscr.qc = 1; 1794 } else { 1795 destElem = srcElem1 << shiftAmt; 1796 } 1797 } 1798 } else { 1799 destElem = srcElem1; 1800 } 1801 Fpscr = fpscr; 1802 ''' 1803 threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode) 1804 threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode) 1805 1806 vqshlSCode = ''' 1807 int16_t shiftAmt = (int8_t)srcElem2; 1808 FPSCR fpscr = (FPSCR)Fpscr; 1809 if (shiftAmt < 0) { 1810 shiftAmt = -shiftAmt; 1811 if (shiftAmt >= sizeof(Element) * 8) { 1812 shiftAmt = sizeof(Element) * 8 - 1; 1813 destElem = 0; 1814 } else { 1815 destElem = (srcElem1 >> shiftAmt); 1816 } 1817 // Make sure the right shift sign extended when it should. 1818 if (srcElem1 < 0 && destElem >= 0) { 1819 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1820 1 - shiftAmt)); 1821 } 1822 } else if (shiftAmt > 0) { 1823 bool sat = false; 1824 if (shiftAmt >= sizeof(Element) * 8) { 1825 if (srcElem1 != 0) 1826 sat = true; 1827 else 1828 destElem = 0; 1829 } else { 1830 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1831 sizeof(Element) * 8 - 1 - shiftAmt) != 1832 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1833 sat = true; 1834 } else { 1835 destElem = srcElem1 << shiftAmt; 1836 } 1837 } 1838 if (sat) { 1839 fpscr.qc = 1; 1840 destElem = mask(sizeof(Element) * 8 - 1); 1841 if (srcElem1 < 0) 1842 destElem = ~destElem; 1843 } 1844 } else { 1845 destElem = srcElem1; 1846 } 1847 Fpscr = fpscr; 1848 ''' 1849 threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode) 1850 threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode) 1851 1852 vqrshlUCode = ''' 1853 int16_t shiftAmt = (int8_t)srcElem2; 1854 FPSCR fpscr = (FPSCR)Fpscr; 1855 if (shiftAmt < 0) { 1856 shiftAmt = -shiftAmt; 1857 Element rBit = 0; 1858 if (shiftAmt <= sizeof(Element) * 8) 1859 rBit = bits(srcElem1, shiftAmt - 1); 1860 if (shiftAmt >= sizeof(Element) * 8) { 1861 shiftAmt = sizeof(Element) * 8 - 1; 1862 destElem = 0; 1863 } else { 1864 destElem = (srcElem1 >> shiftAmt); 1865 } 1866 destElem += rBit; 1867 } else { 1868 if (shiftAmt >= sizeof(Element) * 8) { 1869 if (srcElem1 != 0) { 1870 destElem = mask(sizeof(Element) * 8); 1871 fpscr.qc = 1; 1872 } else { 1873 destElem = 0; 1874 } 1875 } else { 1876 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1877 sizeof(Element) * 8 - shiftAmt)) { 1878 destElem = mask(sizeof(Element) * 8); 1879 fpscr.qc = 1; 1880 } else { 1881 destElem = srcElem1 << shiftAmt; 1882 } 1883 } 1884 } 1885 Fpscr = fpscr; 1886 ''' 1887 threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode) 1888 threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode) 1889 1890 vqrshlSCode = ''' 1891 int16_t shiftAmt = (int8_t)srcElem2; 1892 FPSCR fpscr = (FPSCR)Fpscr; 1893 if (shiftAmt < 0) { 1894 shiftAmt = -shiftAmt; 1895 Element rBit = 0; 1896 if (shiftAmt <= sizeof(Element) * 8) 1897 rBit = bits(srcElem1, shiftAmt - 1); 1898 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1899 rBit = 1; 1900 if (shiftAmt >= sizeof(Element) * 8) { 1901 shiftAmt = sizeof(Element) * 8 - 1; 1902 destElem = 0; 1903 } else { 1904 destElem = (srcElem1 >> shiftAmt); 1905 } 1906 // Make sure the right shift sign extended when it should. 1907 if (srcElem1 < 0 && destElem >= 0) { 1908 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1909 1 - shiftAmt)); 1910 } 1911 destElem += rBit; 1912 } else if (shiftAmt > 0) { 1913 bool sat = false; 1914 if (shiftAmt >= sizeof(Element) * 8) { 1915 if (srcElem1 != 0) 1916 sat = true; 1917 else 1918 destElem = 0; 1919 } else { 1920 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1921 sizeof(Element) * 8 - 1 - shiftAmt) != 1922 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1923 sat = true; 1924 } else { 1925 destElem = srcElem1 << shiftAmt; 1926 } 1927 } 1928 if (sat) { 1929 fpscr.qc = 1; 1930 destElem = mask(sizeof(Element) * 8 - 1); 1931 if (srcElem1 < 0) 1932 destElem = ~destElem; 1933 } 1934 } else { 1935 destElem = srcElem1; 1936 } 1937 Fpscr = fpscr; 1938 ''' 1939 threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode) 1940 threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode) 1941 1942 vabaCode = ''' 1943 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1944 (srcElem2 - srcElem1); 1945 ''' 1946 threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True) 1947 threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True) 1948 vabalCode = ''' 1949 destElem += (srcElem1 > srcElem2) ? 1950 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1951 ((BigElement)srcElem2 - (BigElement)srcElem1); 1952 ''' 1953 threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True) 1954 1955 vabdCode = ''' 1956 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1957 (srcElem2 - srcElem1); 1958 ''' 1959 threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode) 1960 threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode) 1961 vabdlCode = ''' 1962 destElem = (srcElem1 > srcElem2) ? 1963 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1964 ((BigElement)srcElem2 - (BigElement)srcElem1); 1965 ''' 1966 threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode) 1967 1968 vtstCode = ''' 1969 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0; 1970 ''' 1971 threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode) 1972 threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode) 1973 1974 vmulCode = ''' 1975 destElem = srcElem1 * srcElem2; 1976 ''' 1977 threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode) 1978 threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode) 1979 vmullCode = ''' 1980 destElem = (BigElement)srcElem1 * (BigElement)srcElem2; 1981 ''' 1982 threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode) 1983 1984 vmlaCode = ''' 1985 destElem = destElem + srcElem1 * srcElem2; 1986 ''' 1987 threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True) 1988 threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True) 1989 vmlalCode = ''' 1990 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2; 1991 ''' 1992 threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True) 1993 1994 vqdmlalCode = ''' 1995 FPSCR fpscr = (FPSCR)Fpscr; 1996 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 1997 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 1998 Element halfNeg = maxNeg / 2; 1999 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2000 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2001 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2002 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2003 fpscr.qc = 1; 2004 } 2005 bool negPreDest = ltz(destElem); 2006 destElem += midElem; 2007 bool negDest = ltz(destElem); 2008 bool negMid = ltz(midElem); 2009 if (negPreDest == negMid && negMid != negDest) { 2010 destElem = mask(sizeof(BigElement) * 8 - 1); 2011 if (negPreDest) 2012 destElem = ~destElem; 2013 fpscr.qc = 1; 2014 } 2015 Fpscr = fpscr; 2016 ''' 2017 threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True) 2018 2019 vqdmlslCode = ''' 2020 FPSCR fpscr = (FPSCR)Fpscr; 2021 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2022 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2023 Element halfNeg = maxNeg / 2; 2024 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2025 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2026 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2027 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2028 fpscr.qc = 1; 2029 } 2030 bool negPreDest = ltz(destElem); 2031 destElem -= midElem; 2032 bool negDest = ltz(destElem); 2033 bool posMid = ltz((BigElement)-midElem); 2034 if (negPreDest == posMid && posMid != negDest) { 2035 destElem = mask(sizeof(BigElement) * 8 - 1); 2036 if (negPreDest) 2037 destElem = ~destElem; 2038 fpscr.qc = 1; 2039 } 2040 Fpscr = fpscr; 2041 ''' 2042 threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True) 2043 2044 vqdmullCode = ''' 2045 FPSCR fpscr = (FPSCR)Fpscr; 2046 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2047 if (srcElem1 == srcElem2 && 2048 srcElem1 == (Element)((Element)1 << 2049 (Element)(sizeof(Element) * 8 - 1))) { 2050 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2051 fpscr.qc = 1; 2052 } 2053 Fpscr = fpscr; 2054 ''' 2055 threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode) 2056 2057 vmlsCode = ''' 2058 destElem = destElem - srcElem1 * srcElem2; 2059 ''' 2060 threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True) 2061 threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True) 2062 vmlslCode = ''' 2063 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2; 2064 ''' 2065 threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True) 2066 2067 vmulpCode = ''' 2068 destElem = 0; 2069 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2070 if (bits(srcElem2, j)) 2071 destElem ^= srcElem1 << j; 2072 } 2073 ''' 2074 threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode) 2075 threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode) 2076 vmullpCode = ''' 2077 destElem = 0; 2078 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2079 if (bits(srcElem2, j)) 2080 destElem ^= (BigElement)srcElem1 << j; 2081 } 2082 ''' 2083 threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode) 2084 2085 threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True) 2086 threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True) 2087 2088 threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True) 2089 threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True) 2090 2091 vqdmulhCode = ''' 2092 FPSCR fpscr = (FPSCR)Fpscr; 2093 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2094 (sizeof(Element) * 8); 2095 if (srcElem1 == srcElem2 && 2096 srcElem1 == (Element)((Element)1 << 2097 (sizeof(Element) * 8 - 1))) { 2098 destElem = ~srcElem1; 2099 fpscr.qc = 1; 2100 } 2101 Fpscr = fpscr; 2102 ''' 2103 threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode) 2104 threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode) 2105 2106 vqrdmulhCode = ''' 2107 FPSCR fpscr = (FPSCR)Fpscr; 2108 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2109 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2110 (sizeof(Element) * 8); 2111 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2112 Element halfNeg = maxNeg / 2; 2113 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2114 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2115 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2116 if (destElem < 0) { 2117 destElem = mask(sizeof(Element) * 8 - 1); 2118 } else { 2119 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2120 } 2121 fpscr.qc = 1; 2122 } 2123 Fpscr = fpscr; 2124 ''' 2125 threeEqualRegInst("vqrdmulh", "VqrdmulhD", 2126 smallSignedTypes, 2, vqrdmulhCode) 2127 threeEqualRegInst("vqrdmulh", "VqrdmulhQ", 2128 smallSignedTypes, 4, vqrdmulhCode) 2129 2130 vmaxfpCode = ''' 2131 FPSCR fpscr = (FPSCR)Fpscr; 2132 bool done; 2133 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2134 if (!done) { 2135 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS, 2136 true, true, VfpRoundNearest); 2137 } else if (flushToZero(srcReg1, srcReg2)) { 2138 fpscr.idc = 1; 2139 } 2140 Fpscr = fpscr; 2141 ''' 2142 threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode) 2143 threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode) 2144 2145 vminfpCode = ''' 2146 FPSCR fpscr = (FPSCR)Fpscr; 2147 bool done; 2148 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2149 if (!done) { 2150 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS, 2151 true, true, VfpRoundNearest); 2152 } else if (flushToZero(srcReg1, srcReg2)) { 2153 fpscr.idc = 1; 2154 } 2155 Fpscr = fpscr; 2156 ''' 2157 threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode) 2158 threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode) 2159 2160 threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",), 2161 2, vmaxfpCode, pairwise=True) 2162 threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",), 2163 4, vmaxfpCode, pairwise=True) 2164 2165 threeEqualRegInstFp("vpmin", "VpminDFp", ("float",), 2166 2, vminfpCode, pairwise=True) 2167 threeEqualRegInstFp("vpmin", "VpminQFp", ("float",), 2168 4, vminfpCode, pairwise=True) 2169 2170 vaddfpCode = ''' 2171 FPSCR fpscr = Fpscr; 2172 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, 2173 true, true, VfpRoundNearest); 2174 Fpscr = fpscr; 2175 ''' 2176 threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode) 2177 threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode) 2178 2179 threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",), 2180 2, vaddfpCode, pairwise=True) 2181 threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",), 2182 4, vaddfpCode, pairwise=True) 2183 2184 vsubfpCode = ''' 2185 FPSCR fpscr = Fpscr; 2186 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2187 true, true, VfpRoundNearest); 2188 Fpscr = fpscr; 2189 ''' 2190 threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode) 2191 threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode) 2192 2193 vmulfpCode = ''' 2194 FPSCR fpscr = Fpscr; 2195 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2196 true, true, VfpRoundNearest); 2197 Fpscr = fpscr; 2198 ''' 2199 threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode) 2200 threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode) 2201 2202 vmlafpCode = ''' 2203 FPSCR fpscr = Fpscr; 2204 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2205 true, true, VfpRoundNearest); 2206 destReg = binaryOp(fpscr, mid, destReg, fpAddS, 2207 true, true, VfpRoundNearest); 2208 Fpscr = fpscr; 2209 ''' 2210 threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True) 2211 threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True) 2212 2213 vmlsfpCode = ''' 2214 FPSCR fpscr = Fpscr; 2215 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2216 true, true, VfpRoundNearest); 2217 destReg = binaryOp(fpscr, destReg, mid, fpSubS, 2218 true, true, VfpRoundNearest); 2219 Fpscr = fpscr; 2220 ''' 2221 threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True) 2222 threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True) 2223 2224 vcgtfpCode = ''' 2225 FPSCR fpscr = (FPSCR)Fpscr; 2226 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, 2227 true, true, VfpRoundNearest); 2228 destReg = (res == 0) ? -1 : 0; 2229 if (res == 2.0) 2230 fpscr.ioc = 1; 2231 Fpscr = fpscr; 2232 ''' 2233 threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",), 2234 2, vcgtfpCode, toInt = True) 2235 threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",), 2236 4, vcgtfpCode, toInt = True) 2237 2238 vcgefpCode = ''' 2239 FPSCR fpscr = (FPSCR)Fpscr; 2240 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, 2241 true, true, VfpRoundNearest); 2242 destReg = (res == 0) ? -1 : 0; 2243 if (res == 2.0) 2244 fpscr.ioc = 1; 2245 Fpscr = fpscr; 2246 ''' 2247 threeEqualRegInstFp("vcge", "VcgeDFp", ("float",), 2248 2, vcgefpCode, toInt = True) 2249 threeEqualRegInstFp("vcge", "VcgeQFp", ("float",), 2250 4, vcgefpCode, toInt = True) 2251 2252 vacgtfpCode = ''' 2253 FPSCR fpscr = (FPSCR)Fpscr; 2254 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, 2255 true, true, VfpRoundNearest); 2256 destReg = (res == 0) ? -1 : 0; 2257 if (res == 2.0) 2258 fpscr.ioc = 1; 2259 Fpscr = fpscr; 2260 ''' 2261 threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",), 2262 2, vacgtfpCode, toInt = True) 2263 threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",), 2264 4, vacgtfpCode, toInt = True) 2265 2266 vacgefpCode = ''' 2267 FPSCR fpscr = (FPSCR)Fpscr; 2268 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, 2269 true, true, VfpRoundNearest); 2270 destReg = (res == 0) ? -1 : 0; 2271 if (res == 2.0) 2272 fpscr.ioc = 1; 2273 Fpscr = fpscr; 2274 ''' 2275 threeEqualRegInstFp("vacge", "VacgeDFp", ("float",), 2276 2, vacgefpCode, toInt = True) 2277 threeEqualRegInstFp("vacge", "VacgeQFp", ("float",), 2278 4, vacgefpCode, toInt = True) 2279 2280 vceqfpCode = ''' 2281 FPSCR fpscr = (FPSCR)Fpscr; 2282 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, 2283 true, true, VfpRoundNearest); 2284 destReg = (res == 0) ? -1 : 0; 2285 if (res == 2.0) 2286 fpscr.ioc = 1; 2287 Fpscr = fpscr; 2288 ''' 2289 threeEqualRegInstFp("vceq", "VceqDFp", ("float",), 2290 2, vceqfpCode, toInt = True) 2291 threeEqualRegInstFp("vceq", "VceqQFp", ("float",), 2292 4, vceqfpCode, toInt = True) 2293 2294 vrecpsCode = ''' 2295 FPSCR fpscr = Fpscr; 2296 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, 2297 true, true, VfpRoundNearest); 2298 Fpscr = fpscr; 2299 ''' 2300 threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode) 2301 threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode) 2302 2303 vrsqrtsCode = ''' 2304 FPSCR fpscr = Fpscr; 2305 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, 2306 true, true, VfpRoundNearest); 2307 Fpscr = fpscr; 2308 ''' 2309 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode) 2310 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode) 2311 2312 vabdfpCode = ''' 2313 FPSCR fpscr = Fpscr; 2314 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2315 true, true, VfpRoundNearest); 2316 destReg = fabs(mid); 2317 Fpscr = fpscr; 2318 ''' 2319 threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode) 2320 threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode) 2321 2322 twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True) 2323 twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True) 2324 twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True) 2325 twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True) 2326 twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True) 2327 2328 twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True) 2329 twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True) 2330 twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True) 2331 twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True) 2332 twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True) 2333 2334 twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode) 2335 twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode) 2336 twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode) 2337 twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode) 2338 twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode) 2339 2340 twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode) 2341 twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True) 2342 twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True) 2343 twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode) 2344 twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode) 2345 twoEqualRegInst("vqrdmulh", "VqrdmulhsD", 2346 smallSignedTypes, 2, vqrdmulhCode) 2347 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", 2348 smallSignedTypes, 4, vqrdmulhCode) 2349 2350 vshrCode = ''' 2351 if (imm >= sizeof(srcElem1) * 8) { 2352 if (ltz(srcElem1)) 2353 destElem = -1; 2354 else 2355 destElem = 0; 2356 } else { 2357 destElem = srcElem1 >> imm; 2358 } 2359 ''' 2360 twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode) 2361 twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode) 2362 2363 vsraCode = ''' 2364 Element mid;; 2365 if (imm >= sizeof(srcElem1) * 8) { 2366 mid = ltz(srcElem1) ? -1 : 0; 2367 } else { 2368 mid = srcElem1 >> imm; 2369 if (ltz(srcElem1) && !ltz(mid)) { 2370 mid |= -(mid & ((Element)1 << 2371 (sizeof(Element) * 8 - 1 - imm))); 2372 } 2373 } 2374 destElem += mid; 2375 ''' 2376 twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True) 2377 twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True) 2378 2379 vrshrCode = ''' 2380 if (imm > sizeof(srcElem1) * 8) { 2381 destElem = 0; 2382 } else if (imm) { 2383 Element rBit = bits(srcElem1, imm - 1); 2384 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2385 } else { 2386 destElem = srcElem1; 2387 } 2388 ''' 2389 twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode) 2390 twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode) 2391 2392 vrsraCode = ''' 2393 if (imm > sizeof(srcElem1) * 8) { 2394 destElem += 0; 2395 } else if (imm) { 2396 Element rBit = bits(srcElem1, imm - 1); 2397 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2398 } else { 2399 destElem += srcElem1; 2400 } 2401 ''' 2402 twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True) 2403 twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True) 2404 2405 vsriCode = ''' 2406 if (imm >= sizeof(Element) * 8) 2407 destElem = destElem; 2408 else 2409 destElem = (srcElem1 >> imm) | 2410 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2411 ''' 2412 twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True) 2413 twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True) 2414 2415 vshlCode = ''' 2416 if (imm >= sizeof(Element) * 8) 2417 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 2418 else 2419 destElem = srcElem1 << imm; 2420 ''' 2421 twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode) 2422 twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode) 2423 2424 vsliCode = ''' 2425 if (imm >= sizeof(Element) * 8) 2426 destElem = destElem; 2427 else 2428 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 2429 ''' 2430 twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True) 2431 twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True) 2432 2433 vqshlCode = ''' 2434 FPSCR fpscr = (FPSCR)Fpscr; 2435 if (imm >= sizeof(Element) * 8) { 2436 if (srcElem1 != 0) { 2437 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2438 if (srcElem1 > 0) 2439 destElem = ~destElem; 2440 fpscr.qc = 1; 2441 } else { 2442 destElem = 0; 2443 } 2444 } else if (imm) { 2445 destElem = (srcElem1 << imm); 2446 uint64_t topBits = bits((uint64_t)srcElem1, 2447 sizeof(Element) * 8 - 1, 2448 sizeof(Element) * 8 - 1 - imm); 2449 if (topBits != 0 && topBits != mask(imm + 1)) { 2450 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2451 if (srcElem1 > 0) 2452 destElem = ~destElem; 2453 fpscr.qc = 1; 2454 } 2455 } else { 2456 destElem = srcElem1; 2457 } 2458 Fpscr = fpscr; 2459 ''' 2460 twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode) 2461 twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode) 2462 2463 vqshluCode = ''' 2464 FPSCR fpscr = (FPSCR)Fpscr; 2465 if (imm >= sizeof(Element) * 8) { 2466 if (srcElem1 != 0) { 2467 destElem = mask(sizeof(Element) * 8); 2468 fpscr.qc = 1; 2469 } else { 2470 destElem = 0; 2471 } 2472 } else if (imm) { 2473 destElem = (srcElem1 << imm); 2474 uint64_t topBits = bits((uint64_t)srcElem1, 2475 sizeof(Element) * 8 - 1, 2476 sizeof(Element) * 8 - imm); 2477 if (topBits != 0) { 2478 destElem = mask(sizeof(Element) * 8); 2479 fpscr.qc = 1; 2480 } 2481 } else { 2482 destElem = srcElem1; 2483 } 2484 Fpscr = fpscr; 2485 ''' 2486 twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode) 2487 twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode) 2488 2489 vqshlusCode = ''' 2490 FPSCR fpscr = (FPSCR)Fpscr; 2491 if (imm >= sizeof(Element) * 8) { 2492 if (srcElem1 < 0) { 2493 destElem = 0; 2494 fpscr.qc = 1; 2495 } else if (srcElem1 > 0) { 2496 destElem = mask(sizeof(Element) * 8); 2497 fpscr.qc = 1; 2498 } else { 2499 destElem = 0; 2500 } 2501 } else if (imm) { 2502 destElem = (srcElem1 << imm); 2503 uint64_t topBits = bits((uint64_t)srcElem1, 2504 sizeof(Element) * 8 - 1, 2505 sizeof(Element) * 8 - imm); 2506 if (srcElem1 < 0) { 2507 destElem = 0; 2508 fpscr.qc = 1; 2509 } else if (topBits != 0) { 2510 destElem = mask(sizeof(Element) * 8); 2511 fpscr.qc = 1; 2512 } 2513 } else { 2514 if (srcElem1 < 0) { 2515 fpscr.qc = 1; 2516 destElem = 0; 2517 } else { 2518 destElem = srcElem1; 2519 } 2520 } 2521 Fpscr = fpscr; 2522 ''' 2523 twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode) 2524 twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode) 2525 2526 vshrnCode = ''' 2527 if (imm >= sizeof(srcElem1) * 8) { 2528 destElem = 0; 2529 } else { 2530 destElem = srcElem1 >> imm; 2531 } 2532 ''' 2533 twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode) 2534 2535 vrshrnCode = ''' 2536 if (imm > sizeof(srcElem1) * 8) { 2537 destElem = 0; 2538 } else if (imm) { 2539 Element rBit = bits(srcElem1, imm - 1); 2540 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2541 } else { 2542 destElem = srcElem1; 2543 } 2544 ''' 2545 twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode) 2546 2547 vqshrnCode = ''' 2548 FPSCR fpscr = (FPSCR)Fpscr; 2549 if (imm > sizeof(srcElem1) * 8) { 2550 if (srcElem1 != 0 && srcElem1 != -1) 2551 fpscr.qc = 1; 2552 destElem = 0; 2553 } else if (imm) { 2554 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2555 mid |= -(mid & ((BigElement)1 << 2556 (sizeof(BigElement) * 8 - 1 - imm))); 2557 if (mid != (Element)mid) { 2558 destElem = mask(sizeof(Element) * 8 - 1); 2559 if (srcElem1 < 0) 2560 destElem = ~destElem; 2561 fpscr.qc = 1; 2562 } else { 2563 destElem = mid; 2564 } 2565 } else { 2566 destElem = srcElem1; 2567 } 2568 Fpscr = fpscr; 2569 ''' 2570 twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode) 2571 2572 vqshrunCode = ''' 2573 FPSCR fpscr = (FPSCR)Fpscr; 2574 if (imm > sizeof(srcElem1) * 8) { 2575 if (srcElem1 != 0) 2576 fpscr.qc = 1; 2577 destElem = 0; 2578 } else if (imm) { 2579 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2580 if (mid != (Element)mid) { 2581 destElem = mask(sizeof(Element) * 8); 2582 fpscr.qc = 1; 2583 } else { 2584 destElem = mid; 2585 } 2586 } else { 2587 destElem = srcElem1; 2588 } 2589 Fpscr = fpscr; 2590 ''' 2591 twoRegNarrowShiftInst("vqshrun", "NVqshrun", 2592 smallUnsignedTypes, vqshrunCode) 2593 2594 vqshrunsCode = ''' 2595 FPSCR fpscr = (FPSCR)Fpscr; 2596 if (imm > sizeof(srcElem1) * 8) { 2597 if (srcElem1 != 0) 2598 fpscr.qc = 1; 2599 destElem = 0; 2600 } else if (imm) { 2601 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2602 if (bits(mid, sizeof(BigElement) * 8 - 1, 2603 sizeof(Element) * 8) != 0) { 2604 if (srcElem1 < 0) { 2605 destElem = 0; 2606 } else { 2607 destElem = mask(sizeof(Element) * 8); 2608 } 2609 fpscr.qc = 1; 2610 } else { 2611 destElem = mid; 2612 } 2613 } else { 2614 destElem = srcElem1; 2615 } 2616 Fpscr = fpscr; 2617 ''' 2618 twoRegNarrowShiftInst("vqshrun", "NVqshruns", 2619 smallSignedTypes, vqshrunsCode) 2620 2621 vqrshrnCode = ''' 2622 FPSCR fpscr = (FPSCR)Fpscr; 2623 if (imm > sizeof(srcElem1) * 8) { 2624 if (srcElem1 != 0 && srcElem1 != -1) 2625 fpscr.qc = 1; 2626 destElem = 0; 2627 } else if (imm) { 2628 BigElement mid = (srcElem1 >> (imm - 1)); 2629 uint64_t rBit = mid & 0x1; 2630 mid >>= 1; 2631 mid |= -(mid & ((BigElement)1 << 2632 (sizeof(BigElement) * 8 - 1 - imm))); 2633 mid += rBit; 2634 if (mid != (Element)mid) { 2635 destElem = mask(sizeof(Element) * 8 - 1); 2636 if (srcElem1 < 0) 2637 destElem = ~destElem; 2638 fpscr.qc = 1; 2639 } else { 2640 destElem = mid; 2641 } 2642 } else { 2643 if (srcElem1 != (Element)srcElem1) { 2644 destElem = mask(sizeof(Element) * 8 - 1); 2645 if (srcElem1 < 0) 2646 destElem = ~destElem; 2647 fpscr.qc = 1; 2648 } else { 2649 destElem = srcElem1; 2650 } 2651 } 2652 Fpscr = fpscr; 2653 ''' 2654 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", 2655 smallSignedTypes, vqrshrnCode) 2656 2657 vqrshrunCode = ''' 2658 FPSCR fpscr = (FPSCR)Fpscr; 2659 if (imm > sizeof(srcElem1) * 8) { 2660 if (srcElem1 != 0) 2661 fpscr.qc = 1; 2662 destElem = 0; 2663 } else if (imm) { 2664 BigElement mid = (srcElem1 >> (imm - 1)); 2665 uint64_t rBit = mid & 0x1; 2666 mid >>= 1; 2667 mid += rBit; 2668 if (mid != (Element)mid) { 2669 destElem = mask(sizeof(Element) * 8); 2670 fpscr.qc = 1; 2671 } else { 2672 destElem = mid; 2673 } 2674 } else { 2675 if (srcElem1 != (Element)srcElem1) { 2676 destElem = mask(sizeof(Element) * 8 - 1); 2677 fpscr.qc = 1; 2678 } else { 2679 destElem = srcElem1; 2680 } 2681 } 2682 Fpscr = fpscr; 2683 ''' 2684 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", 2685 smallUnsignedTypes, vqrshrunCode) 2686 2687 vqrshrunsCode = ''' 2688 FPSCR fpscr = (FPSCR)Fpscr; 2689 if (imm > sizeof(srcElem1) * 8) { 2690 if (srcElem1 != 0) 2691 fpscr.qc = 1; 2692 destElem = 0; 2693 } else if (imm) { 2694 BigElement mid = (srcElem1 >> (imm - 1)); 2695 uint64_t rBit = mid & 0x1; 2696 mid >>= 1; 2697 mid |= -(mid & ((BigElement)1 << 2698 (sizeof(BigElement) * 8 - 1 - imm))); 2699 mid += rBit; 2700 if (bits(mid, sizeof(BigElement) * 8 - 1, 2701 sizeof(Element) * 8) != 0) { 2702 if (srcElem1 < 0) { 2703 destElem = 0; 2704 } else { 2705 destElem = mask(sizeof(Element) * 8); 2706 } 2707 fpscr.qc = 1; 2708 } else { 2709 destElem = mid; 2710 } 2711 } else { 2712 if (srcElem1 < 0) { 2713 fpscr.qc = 1; 2714 destElem = 0; 2715 } else { 2716 destElem = srcElem1; 2717 } 2718 } 2719 Fpscr = fpscr; 2720 ''' 2721 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", 2722 smallSignedTypes, vqrshrunsCode) 2723 2724 vshllCode = ''' 2725 if (imm >= sizeof(destElem) * 8) { 2726 destElem = 0; 2727 } else { 2728 destElem = (BigElement)srcElem1 << imm; 2729 } 2730 ''' 2731 twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode) 2732 2733 vmovlCode = ''' 2734 destElem = srcElem1; 2735 ''' 2736 twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode) 2737 2738 vcvt2ufxCode = ''' 2739 FPSCR fpscr = Fpscr; 2740 if (flushToZero(srcElem1)) 2741 fpscr.idc = 1; 2742 VfpSavedState state = prepFpState(VfpRoundNearest); 2743 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2744 destReg = vfpFpSToFixed(srcElem1, false, false, imm); 2745 __asm__ __volatile__("" :: "m" (destReg)); 2746 finishVfp(fpscr, state, true); 2747 Fpscr = fpscr; 2748 ''' 2749 twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",), 2750 2, vcvt2ufxCode, toInt = True) 2751 twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",), 2752 4, vcvt2ufxCode, toInt = True) 2753 2754 vcvt2sfxCode = ''' 2755 FPSCR fpscr = Fpscr; 2756 if (flushToZero(srcElem1)) 2757 fpscr.idc = 1; 2758 VfpSavedState state = prepFpState(VfpRoundNearest); 2759 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2760 destReg = vfpFpSToFixed(srcElem1, true, false, imm); 2761 __asm__ __volatile__("" :: "m" (destReg)); 2762 finishVfp(fpscr, state, true); 2763 Fpscr = fpscr; 2764 ''' 2765 twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",), 2766 2, vcvt2sfxCode, toInt = True) 2767 twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",), 2768 4, vcvt2sfxCode, toInt = True) 2769 2770 vcvtu2fpCode = ''' 2771 FPSCR fpscr = Fpscr; 2772 VfpSavedState state = prepFpState(VfpRoundNearest); 2773 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2774 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); 2775 __asm__ __volatile__("" :: "m" (destElem)); 2776 finishVfp(fpscr, state, true); 2777 Fpscr = fpscr; 2778 ''' 2779 twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",), 2780 2, vcvtu2fpCode, fromInt = True) 2781 twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",), 2782 4, vcvtu2fpCode, fromInt = True) 2783 2784 vcvts2fpCode = ''' 2785 FPSCR fpscr = Fpscr; 2786 VfpSavedState state = prepFpState(VfpRoundNearest); 2787 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2788 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); 2789 __asm__ __volatile__("" :: "m" (destElem)); 2790 finishVfp(fpscr, state, true); 2791 Fpscr = fpscr; 2792 ''' 2793 twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",), 2794 2, vcvts2fpCode, fromInt = True) 2795 twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",), 2796 4, vcvts2fpCode, fromInt = True) 2797 2798 vcvts2hCode = ''' 2799 FPSCR fpscr = Fpscr; 2800 float srcFp1 = bitsToFp(srcElem1, (float)0.0); 2801 if (flushToZero(srcFp1)) 2802 fpscr.idc = 1; 2803 VfpSavedState state = prepFpState(VfpRoundNearest); 2804 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem) 2805 : "m" (srcFp1), "m" (destElem)); 2806 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest, 2807 fpscr.ahp, srcFp1); 2808 __asm__ __volatile__("" :: "m" (destElem)); 2809 finishVfp(fpscr, state, true); 2810 Fpscr = fpscr; 2811 ''' 2812 twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode) 2813 2814 vcvth2sCode = ''' 2815 FPSCR fpscr = Fpscr; 2816 VfpSavedState state = prepFpState(VfpRoundNearest); 2817 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) 2818 : "m" (srcElem1), "m" (destElem)); 2819 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); 2820 __asm__ __volatile__("" :: "m" (destElem)); 2821 finishVfp(fpscr, state, true); 2822 Fpscr = fpscr; 2823 ''' 2824 twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode) 2825 2826 vrsqrteCode = ''' 2827 destElem = unsignedRSqrtEstimate(srcElem1); 2828 ''' 2829 twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode) 2830 twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode) 2831 2832 vrsqrtefpCode = ''' 2833 FPSCR fpscr = Fpscr; 2834 if (flushToZero(srcReg1)) 2835 fpscr.idc = 1; 2836 destReg = fprSqrtEstimate(fpscr, srcReg1); 2837 Fpscr = fpscr; 2838 ''' 2839 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode) 2840 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode) 2841 2842 vrecpeCode = ''' 2843 destElem = unsignedRecipEstimate(srcElem1); 2844 ''' 2845 twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode) 2846 twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode) 2847 2848 vrecpefpCode = ''' 2849 FPSCR fpscr = Fpscr; 2850 if (flushToZero(srcReg1)) 2851 fpscr.idc = 1; 2852 destReg = fpRecipEstimate(fpscr, srcReg1); 2853 Fpscr = fpscr; 2854 ''' 2855 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode) 2856 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode) 2857 2858 vrev16Code = ''' 2859 destElem = srcElem1; 2860 unsigned groupSize = ((1 << 1) / sizeof(Element)); 2861 unsigned reverseMask = (groupSize - 1); 2862 j = i ^ reverseMask; 2863 ''' 2864 twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code) 2865 twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code) 2866 vrev32Code = ''' 2867 destElem = srcElem1; 2868 unsigned groupSize = ((1 << 2) / sizeof(Element)); 2869 unsigned reverseMask = (groupSize - 1); 2870 j = i ^ reverseMask; 2871 ''' 2872 twoRegMiscInst("vrev32", "NVrev32D", 2873 ("uint8_t", "uint16_t"), 2, vrev32Code) 2874 twoRegMiscInst("vrev32", "NVrev32Q", 2875 ("uint8_t", "uint16_t"), 4, vrev32Code) 2876 vrev64Code = ''' 2877 destElem = srcElem1; 2878 unsigned groupSize = ((1 << 3) / sizeof(Element)); 2879 unsigned reverseMask = (groupSize - 1); 2880 j = i ^ reverseMask; 2881 ''' 2882 twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code) 2883 twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code) 2884 2885 vpaddlCode = ''' 2886 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 2887 ''' 2888 twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode) 2889 twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode) 2890 2891 vpadalCode = ''' 2892 destElem += (BigElement)srcElem1 + (BigElement)srcElem2; 2893 ''' 2894 twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True) 2895 twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True) 2896 2897 vclsCode = ''' 2898 unsigned count = 0; 2899 if (srcElem1 < 0) { 2900 srcElem1 <<= 1; 2901 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 2902 count++; 2903 srcElem1 <<= 1; 2904 } 2905 } else { 2906 srcElem1 <<= 1; 2907 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 2908 count++; 2909 srcElem1 <<= 1; 2910 } 2911 } 2912 destElem = count; 2913 ''' 2914 twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode) 2915 twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode) 2916 2917 vclzCode = ''' 2918 unsigned count = 0; 2919 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 2920 count++; 2921 srcElem1 <<= 1; 2922 } 2923 destElem = count; 2924 ''' 2925 twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode) 2926 twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode) 2927 2928 vcntCode = ''' 2929 unsigned count = 0; 2930 while (srcElem1 && count < sizeof(Element) * 8) { 2931 count += srcElem1 & 0x1; 2932 srcElem1 >>= 1; 2933 } 2934 destElem = count; 2935 ''' 2936 twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode) 2937 twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode) 2938 2939 vmvnCode = ''' 2940 destElem = ~srcElem1; 2941 ''' 2942 twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode) 2943 twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode) 2944 2945 vqabsCode = ''' 2946 FPSCR fpscr = (FPSCR)Fpscr; 2947 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2948 fpscr.qc = 1; 2949 destElem = ~srcElem1; 2950 } else if (srcElem1 < 0) { 2951 destElem = -srcElem1; 2952 } else { 2953 destElem = srcElem1; 2954 } 2955 Fpscr = fpscr; 2956 ''' 2957 twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode) 2958 twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode) 2959 2960 vqnegCode = ''' 2961 FPSCR fpscr = (FPSCR)Fpscr; 2962 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2963 fpscr.qc = 1; 2964 destElem = ~srcElem1; 2965 } else { 2966 destElem = -srcElem1; 2967 } 2968 Fpscr = fpscr; 2969 ''' 2970 twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode) 2971 twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode) 2972 2973 vabsCode = ''' 2974 if (srcElem1 < 0) { 2975 destElem = -srcElem1; 2976 } else { 2977 destElem = srcElem1; 2978 } 2979 ''' 2980 twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode) 2981 twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode) 2982 vabsfpCode = ''' 2983 union 2984 { 2985 uint32_t i; 2986 float f; 2987 } cStruct; 2988 cStruct.f = srcReg1; 2989 cStruct.i &= mask(sizeof(Element) * 8 - 1); 2990 destReg = cStruct.f; 2991 ''' 2992 twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode) 2993 twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode) 2994 2995 vnegCode = ''' 2996 destElem = -srcElem1; 2997 ''' 2998 twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode) 2999 twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode) 3000 vnegfpCode = ''' 3001 destReg = -srcReg1; 3002 ''' 3003 twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode) 3004 twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode) 3005 3006 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;' 3007 twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode) 3008 twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode) 3009 vcgtfpCode = ''' 3010 FPSCR fpscr = (FPSCR)Fpscr; 3011 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc, 3012 true, true, VfpRoundNearest); 3013 destReg = (res == 0) ? -1 : 0; 3014 if (res == 2.0) 3015 fpscr.ioc = 1; 3016 Fpscr = fpscr; 3017 ''' 3018 twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",), 3019 2, vcgtfpCode, toInt = True) 3020 twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",), 3021 4, vcgtfpCode, toInt = True) 3022 3023 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;' 3024 twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode) 3025 twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode) 3026 vcgefpCode = ''' 3027 FPSCR fpscr = (FPSCR)Fpscr; 3028 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc, 3029 true, true, VfpRoundNearest); 3030 destReg = (res == 0) ? -1 : 0; 3031 if (res == 2.0) 3032 fpscr.ioc = 1; 3033 Fpscr = fpscr; 3034 ''' 3035 twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",), 3036 2, vcgefpCode, toInt = True) 3037 twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",), 3038 4, vcgefpCode, toInt = True) 3039 3040 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;' 3041 twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode) 3042 twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode) 3043 vceqfpCode = ''' 3044 FPSCR fpscr = (FPSCR)Fpscr; 3045 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc, 3046 true, true, VfpRoundNearest); 3047 destReg = (res == 0) ? -1 : 0; 3048 if (res == 2.0) 3049 fpscr.ioc = 1; 3050 Fpscr = fpscr; 3051 ''' 3052 twoRegMiscInstFp("vceq", "NVceqDFp", ("float",), 3053 2, vceqfpCode, toInt = True) 3054 twoRegMiscInstFp("vceq", "NVceqQFp", ("float",), 3055 4, vceqfpCode, toInt = True) 3056 3057 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;' 3058 twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode) 3059 twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode) 3060 vclefpCode = ''' 3061 FPSCR fpscr = (FPSCR)Fpscr; 3062 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc, 3063 true, true, VfpRoundNearest); 3064 destReg = (res == 0) ? -1 : 0; 3065 if (res == 2.0) 3066 fpscr.ioc = 1; 3067 Fpscr = fpscr; 3068 ''' 3069 twoRegMiscInstFp("vcle", "NVcleDFp", ("float",), 3070 2, vclefpCode, toInt = True) 3071 twoRegMiscInstFp("vcle", "NVcleQFp", ("float",), 3072 4, vclefpCode, toInt = True) 3073 3074 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;' 3075 twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode) 3076 twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode) 3077 vcltfpCode = ''' 3078 FPSCR fpscr = (FPSCR)Fpscr; 3079 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc, 3080 true, true, VfpRoundNearest); 3081 destReg = (res == 0) ? -1 : 0; 3082 if (res == 2.0) 3083 fpscr.ioc = 1; 3084 Fpscr = fpscr; 3085 ''' 3086 twoRegMiscInstFp("vclt", "NVcltDFp", ("float",), 3087 2, vcltfpCode, toInt = True) 3088 twoRegMiscInstFp("vclt", "NVcltQFp", ("float",), 3089 4, vcltfpCode, toInt = True) 3090 3091 vswpCode = ''' 3092 FloatRegBits mid; 3093 for (unsigned r = 0; r < rCount; r++) { 3094 mid = srcReg1.regs[r]; 3095 srcReg1.regs[r] = destReg.regs[r]; 3096 destReg.regs[r] = mid; 3097 } 3098 ''' 3099 twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode) 3100 twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode) 3101 3102 vtrnCode = ''' 3103 Element mid; 3104 for (unsigned i = 0; i < eCount; i += 2) { 3105 mid = srcReg1.elements[i]; 3106 srcReg1.elements[i] = destReg.elements[i + 1]; 3107 destReg.elements[i + 1] = mid; 3108 } 3109 ''' 3110 twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode) 3111 twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode) 3112 3113 vuzpCode = ''' 3114 Element mid[eCount]; 3115 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3116 for (unsigned i = 0; i < eCount / 2; i++) { 3117 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3118 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3119 destReg.elements[i] = destReg.elements[2 * i]; 3120 } 3121 for (unsigned i = 0; i < eCount / 2; i++) { 3122 destReg.elements[eCount / 2 + i] = mid[2 * i]; 3123 } 3124 ''' 3125 twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode) 3126 twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode) 3127 3128 vzipCode = ''' 3129 Element mid[eCount]; 3130 memcpy(&mid, &destReg, sizeof(destReg)); 3131 for (unsigned i = 0; i < eCount / 2; i++) { 3132 destReg.elements[2 * i] = mid[i]; 3133 destReg.elements[2 * i + 1] = srcReg1.elements[i]; 3134 } 3135 for (int i = 0; i < eCount / 2; i++) { 3136 srcReg1.elements[2 * i] = mid[eCount / 2 + i]; 3137 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i]; 3138 } 3139 ''' 3140 twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode) 3141 twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode) 3142 3143 vmovnCode = 'destElem = srcElem1;' 3144 twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode) 3145 3146 vdupCode = 'destElem = srcElem1;' 3147 twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode) 3148 twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode) 3149 3150 def vdupGprInst(name, Name, types, rCount): 3151 global header_output, exec_output 3152 eWalkCode = ''' 3153 RegVect destReg; 3154 for (unsigned i = 0; i < eCount; i++) { 3155 destReg.elements[i] = htog((Element)Op1); 3156 } 3157 ''' 3158 for reg in range(rCount): 3159 eWalkCode += ''' 3160 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 3161 ''' % { "reg" : reg } 3162 iop = InstObjParams(name, Name, 3163 "RegRegOp", 3164 { "code": eWalkCode, 3165 "r_count": rCount, 3166 "predicate_test": predicateTest }, []) 3167 header_output += NeonRegRegOpDeclare.subst(iop) 3168 exec_output += NeonEqualRegExecute.subst(iop) 3169 for type in types: 3170 substDict = { "targs" : type, 3171 "class_name" : Name } 3172 exec_output += NeonExecDeclare.subst(substDict) 3173 vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2) 3174 vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4) 3175 3176 vmovCode = 'destElem = imm;' 3177 oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode) 3178 oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode) 3179 3180 vorrCode = 'destElem |= imm;' 3181 oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True) 3182 oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True) 3183 3184 vmvnCode = 'destElem = ~imm;' 3185 oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode) 3186 oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode) 3187 3188 vbicCode = 'destElem &= ~imm;' 3189 oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True) 3190 oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True) 3191 3192 vqmovnCode = ''' 3193 FPSCR fpscr = (FPSCR)Fpscr; 3194 destElem = srcElem1; 3195 if ((BigElement)destElem != srcElem1) { 3196 fpscr.qc = 1; 3197 destElem = mask(sizeof(Element) * 8 - 1); 3198 if (srcElem1 < 0) 3199 destElem = ~destElem; 3200 } 3201 Fpscr = fpscr; 3202 ''' 3203 twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode) 3204 3205 vqmovunCode = ''' 3206 FPSCR fpscr = (FPSCR)Fpscr; 3207 destElem = srcElem1; 3208 if ((BigElement)destElem != srcElem1) { 3209 fpscr.qc = 1; 3210 destElem = mask(sizeof(Element) * 8); 3211 } 3212 Fpscr = fpscr; 3213 ''' 3214 twoRegNarrowMiscInst("vqmovun", "NVqmovun", 3215 smallUnsignedTypes, vqmovunCode) 3216 3217 vqmovunsCode = ''' 3218 FPSCR fpscr = (FPSCR)Fpscr; 3219 destElem = srcElem1; 3220 if (srcElem1 < 0 || 3221 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 3222 fpscr.qc = 1; 3223 destElem = mask(sizeof(Element) * 8); 3224 if (srcElem1 < 0) 3225 destElem = ~destElem; 3226 } 3227 Fpscr = fpscr; 3228 ''' 3229 twoRegNarrowMiscInst("vqmovun", "NVqmovuns", 3230 smallSignedTypes, vqmovunsCode) 3231 3232 def buildVext(name, Name, types, rCount, op): 3233 global header_output, exec_output 3234 eWalkCode = ''' 3235 RegVect srcReg1, srcReg2, destReg; 3236 ''' 3237 for reg in range(rCount): 3238 eWalkCode += ''' 3239 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 3240 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 3241 ''' % { "reg" : reg } 3242 eWalkCode += op 3243 for reg in range(rCount): 3244 eWalkCode += ''' 3245 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 3246 ''' % { "reg" : reg } 3247 iop = InstObjParams(name, Name, 3248 "RegRegRegImmOp", 3249 { "code": eWalkCode, 3250 "r_count": rCount, 3251 "predicate_test": predicateTest }, []) 3252 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 3253 exec_output += NeonEqualRegExecute.subst(iop) 3254 for type in types: 3255 substDict = { "targs" : type, 3256 "class_name" : Name } 3257 exec_output += NeonExecDeclare.subst(substDict) 3258 3259 vextCode = ''' 3260 for (unsigned i = 0; i < eCount; i++) { 3261 unsigned index = i + imm; 3262 if (index < eCount) { 3263 destReg.elements[i] = srcReg1.elements[index]; 3264 } else { 3265 index -= eCount; 3266 assert(index < eCount); 3267 destReg.elements[i] = srcReg2.elements[index]; 3268 } 3269 } 3270 ''' 3271 buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode) 3272 buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode) 3273 3274 def buildVtbxl(name, Name, length, isVtbl): 3275 global header_output, decoder_output, exec_output 3276 code = ''' 3277 union 3278 { 3279 uint8_t bytes[32]; 3280 FloatRegBits regs[8]; 3281 } table; 3282 3283 union 3284 { 3285 uint8_t bytes[8]; 3286 FloatRegBits regs[2]; 3287 } destReg, srcReg2; 3288 3289 const unsigned length = %(length)d; 3290 const bool isVtbl = %(isVtbl)s; 3291 3292 srcReg2.regs[0] = htog(FpOp2P0.uw); 3293 srcReg2.regs[1] = htog(FpOp2P1.uw); 3294 3295 destReg.regs[0] = htog(FpDestP0.uw); 3296 destReg.regs[1] = htog(FpDestP1.uw); 3297 ''' % { "length" : length, "isVtbl" : isVtbl } 3298 for reg in range(8): 3299 if reg < length * 2: 3300 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \ 3301 { "reg" : reg } 3302 else: 3303 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg } 3304 code += ''' 3305 for (unsigned i = 0; i < sizeof(destReg); i++) { 3306 uint8_t index = srcReg2.bytes[i]; 3307 if (index < 8 * length) { 3308 destReg.bytes[i] = table.bytes[index]; 3309 } else { 3310 if (isVtbl) 3311 destReg.bytes[i] = 0; 3312 // else destReg.bytes[i] unchanged 3313 } 3314 } 3315 3316 FpDestP0.uw = gtoh(destReg.regs[0]); 3317 FpDestP1.uw = gtoh(destReg.regs[1]); 3318 ''' 3319 iop = InstObjParams(name, Name, 3320 "RegRegRegOp", 3321 { "code": code, 3322 "predicate_test": predicateTest }, []) 3323 header_output += RegRegRegOpDeclare.subst(iop) 3324 decoder_output += RegRegRegOpConstructor.subst(iop) 3325 exec_output += PredOpExecute.subst(iop) 3326 3327 buildVtbxl("vtbl", "NVtbl1", 1, "true") 3328 buildVtbxl("vtbl", "NVtbl2", 2, "true") 3329 buildVtbxl("vtbl", "NVtbl3", 3, "true") 3330 buildVtbxl("vtbl", "NVtbl4", 4, "true") 3331 3332 buildVtbxl("vtbx", "NVtbx1", 1, "false") 3333 buildVtbxl("vtbx", "NVtbx2", 2, "false") 3334 buildVtbxl("vtbx", "NVtbx3", 3, "false") 3335 buildVtbxl("vtbx", "NVtbx4", 4, "false") 3336}}; 3337