neon.isa revision 7640
1// -*- mode:c++ -*- 2 3// Copyright (c) 2010 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Gabe Black 39 40output header {{ 41 template <template <typename T> class Base> 42 StaticInstPtr 43 decodeNeonUThreeUReg(unsigned size, 44 ExtMachInst machInst, IntRegIndex dest, 45 IntRegIndex op1, IntRegIndex op2) 46 { 47 switch (size) { 48 case 0: 49 return new Base<uint8_t>(machInst, dest, op1, op2); 50 case 1: 51 return new Base<uint16_t>(machInst, dest, op1, op2); 52 case 2: 53 return new Base<uint32_t>(machInst, dest, op1, op2); 54 case 3: 55 return new Base<uint64_t>(machInst, dest, op1, op2); 56 default: 57 return new Unknown(machInst); 58 } 59 } 60 61 template <template <typename T> class Base> 62 StaticInstPtr 63 decodeNeonSThreeUReg(unsigned size, 64 ExtMachInst machInst, IntRegIndex dest, 65 IntRegIndex op1, IntRegIndex op2) 66 { 67 switch (size) { 68 case 0: 69 return new Base<int8_t>(machInst, dest, op1, op2); 70 case 1: 71 return new Base<int16_t>(machInst, dest, op1, op2); 72 case 2: 73 return new Base<int32_t>(machInst, dest, op1, op2); 74 case 3: 75 return new Base<int64_t>(machInst, dest, op1, op2); 76 default: 77 return new Unknown(machInst); 78 } 79 } 80 81 template <template <typename T> class Base> 82 StaticInstPtr 83 decodeNeonUSThreeUReg(bool notSigned, unsigned size, 84 ExtMachInst machInst, IntRegIndex dest, 85 IntRegIndex op1, IntRegIndex op2) 86 { 87 if (notSigned) { 88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2); 89 } else { 90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2); 91 } 92 } 93 94 template <template <typename T> class Base> 95 StaticInstPtr 96 decodeNeonUThreeUSReg(unsigned size, 97 ExtMachInst machInst, IntRegIndex dest, 98 IntRegIndex op1, IntRegIndex op2) 99 { 100 switch (size) { 101 case 0: 102 return new Base<uint8_t>(machInst, dest, op1, op2); 103 case 1: 104 return new Base<uint16_t>(machInst, dest, op1, op2); 105 case 2: 106 return new Base<uint32_t>(machInst, dest, op1, op2); 107 default: 108 return new Unknown(machInst); 109 } 110 } 111 112 template <template <typename T> class Base> 113 StaticInstPtr 114 decodeNeonSThreeUSReg(unsigned size, 115 ExtMachInst machInst, IntRegIndex dest, 116 IntRegIndex op1, IntRegIndex op2) 117 { 118 switch (size) { 119 case 0: 120 return new Base<int8_t>(machInst, dest, op1, op2); 121 case 1: 122 return new Base<int16_t>(machInst, dest, op1, op2); 123 case 2: 124 return new Base<int32_t>(machInst, dest, op1, op2); 125 default: 126 return new Unknown(machInst); 127 } 128 } 129 130 template <template <typename T> class Base> 131 StaticInstPtr 132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size, 133 ExtMachInst machInst, IntRegIndex dest, 134 IntRegIndex op1, IntRegIndex op2) 135 { 136 if (notSigned) { 137 return decodeNeonUThreeUSReg<Base>( 138 size, machInst, dest, op1, op2); 139 } else { 140 return decodeNeonSThreeUSReg<Base>( 141 size, machInst, dest, op1, op2); 142 } 143 } 144 145 template <template <typename T> class BaseD, 146 template <typename T> class BaseQ> 147 StaticInstPtr 148 decodeNeonUThreeSReg(bool q, unsigned size, 149 ExtMachInst machInst, IntRegIndex dest, 150 IntRegIndex op1, IntRegIndex op2) 151 { 152 if (q) { 153 return decodeNeonUThreeUSReg<BaseQ>( 154 size, machInst, dest, op1, op2); 155 } else { 156 return decodeNeonUThreeUSReg<BaseD>( 157 size, machInst, dest, op1, op2); 158 } 159 } 160 161 template <template <typename T> class BaseD, 162 template <typename T> class BaseQ> 163 StaticInstPtr 164 decodeNeonSThreeSReg(bool q, unsigned size, 165 ExtMachInst machInst, IntRegIndex dest, 166 IntRegIndex op1, IntRegIndex op2) 167 { 168 if (q) { 169 return decodeNeonSThreeUSReg<BaseQ>( 170 size, machInst, dest, op1, op2); 171 } else { 172 return decodeNeonSThreeUSReg<BaseD>( 173 size, machInst, dest, op1, op2); 174 } 175 } 176 177 template <template <typename T> class BaseD, 178 template <typename T> class BaseQ> 179 StaticInstPtr 180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, 181 ExtMachInst machInst, IntRegIndex dest, 182 IntRegIndex op1, IntRegIndex op2) 183 { 184 if (notSigned) { 185 return decodeNeonUThreeSReg<BaseD, BaseQ>( 186 q, size, machInst, dest, op1, op2); 187 } else { 188 return decodeNeonSThreeSReg<BaseD, BaseQ>( 189 q, size, machInst, dest, op1, op2); 190 } 191 } 192 193 template <template <typename T> class BaseD, 194 template <typename T> class BaseQ> 195 StaticInstPtr 196 decodeNeonUThreeReg(bool q, unsigned size, 197 ExtMachInst machInst, IntRegIndex dest, 198 IntRegIndex op1, IntRegIndex op2) 199 { 200 if (q) { 201 return decodeNeonUThreeUReg<BaseQ>( 202 size, machInst, dest, op1, op2); 203 } else { 204 return decodeNeonUThreeUReg<BaseD>( 205 size, machInst, dest, op1, op2); 206 } 207 } 208 209 template <template <typename T> class BaseD, 210 template <typename T> class BaseQ> 211 StaticInstPtr 212 decodeNeonSThreeReg(bool q, unsigned size, 213 ExtMachInst machInst, IntRegIndex dest, 214 IntRegIndex op1, IntRegIndex op2) 215 { 216 if (q) { 217 return decodeNeonSThreeUReg<BaseQ>( 218 size, machInst, dest, op1, op2); 219 } else { 220 return decodeNeonSThreeUReg<BaseD>( 221 size, machInst, dest, op1, op2); 222 } 223 } 224 225 template <template <typename T> class BaseD, 226 template <typename T> class BaseQ> 227 StaticInstPtr 228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size, 229 ExtMachInst machInst, IntRegIndex dest, 230 IntRegIndex op1, IntRegIndex op2) 231 { 232 if (notSigned) { 233 return decodeNeonUThreeReg<BaseD, BaseQ>( 234 q, size, machInst, dest, op1, op2); 235 } else { 236 return decodeNeonSThreeReg<BaseD, BaseQ>( 237 q, size, machInst, dest, op1, op2); 238 } 239 } 240 241 template <template <typename T> class BaseD, 242 template <typename T> class BaseQ> 243 StaticInstPtr 244 decodeNeonUTwoShiftReg(bool q, unsigned size, 245 ExtMachInst machInst, IntRegIndex dest, 246 IntRegIndex op1, uint64_t imm) 247 { 248 if (q) { 249 switch (size) { 250 case 0: 251 return new BaseQ<uint8_t>(machInst, dest, op1, imm); 252 case 1: 253 return new BaseQ<uint16_t>(machInst, dest, op1, imm); 254 case 2: 255 return new BaseQ<uint32_t>(machInst, dest, op1, imm); 256 case 3: 257 return new BaseQ<uint64_t>(machInst, dest, op1, imm); 258 default: 259 return new Unknown(machInst); 260 } 261 } else { 262 switch (size) { 263 case 0: 264 return new BaseD<uint8_t>(machInst, dest, op1, imm); 265 case 1: 266 return new BaseD<uint16_t>(machInst, dest, op1, imm); 267 case 2: 268 return new BaseD<uint32_t>(machInst, dest, op1, imm); 269 case 3: 270 return new BaseD<uint64_t>(machInst, dest, op1, imm); 271 default: 272 return new Unknown(machInst); 273 } 274 } 275 } 276 277 template <template <typename T> class BaseD, 278 template <typename T> class BaseQ> 279 StaticInstPtr 280 decodeNeonSTwoShiftReg(bool q, unsigned size, 281 ExtMachInst machInst, IntRegIndex dest, 282 IntRegIndex op1, uint64_t imm) 283 { 284 if (q) { 285 switch (size) { 286 case 0: 287 return new BaseQ<int8_t>(machInst, dest, op1, imm); 288 case 1: 289 return new BaseQ<int16_t>(machInst, dest, op1, imm); 290 case 2: 291 return new BaseQ<int32_t>(machInst, dest, op1, imm); 292 case 3: 293 return new BaseQ<int64_t>(machInst, dest, op1, imm); 294 default: 295 return new Unknown(machInst); 296 } 297 } else { 298 switch (size) { 299 case 0: 300 return new BaseD<int8_t>(machInst, dest, op1, imm); 301 case 1: 302 return new BaseD<int16_t>(machInst, dest, op1, imm); 303 case 2: 304 return new BaseD<int32_t>(machInst, dest, op1, imm); 305 case 3: 306 return new BaseD<int64_t>(machInst, dest, op1, imm); 307 default: 308 return new Unknown(machInst); 309 } 310 } 311 } 312 313 314 template <template <typename T> class BaseD, 315 template <typename T> class BaseQ> 316 StaticInstPtr 317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size, 318 ExtMachInst machInst, IntRegIndex dest, 319 IntRegIndex op1, uint64_t imm) 320 { 321 if (notSigned) { 322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>( 323 q, size, machInst, dest, op1, imm); 324 } else { 325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>( 326 q, size, machInst, dest, op1, imm); 327 } 328 } 329 330 template <template <typename T> class Base> 331 StaticInstPtr 332 decodeNeonUTwoShiftUSReg(unsigned size, 333 ExtMachInst machInst, IntRegIndex dest, 334 IntRegIndex op1, uint64_t imm) 335 { 336 switch (size) { 337 case 0: 338 return new Base<uint8_t>(machInst, dest, op1, imm); 339 case 1: 340 return new Base<uint16_t>(machInst, dest, op1, imm); 341 case 2: 342 return new Base<uint32_t>(machInst, dest, op1, imm); 343 default: 344 return new Unknown(machInst); 345 } 346 } 347 348 template <template <typename T> class BaseD, 349 template <typename T> class BaseQ> 350 StaticInstPtr 351 decodeNeonUTwoShiftSReg(bool q, unsigned size, 352 ExtMachInst machInst, IntRegIndex dest, 353 IntRegIndex op1, uint64_t imm) 354 { 355 if (q) { 356 return decodeNeonUTwoShiftUSReg<BaseQ>( 357 size, machInst, dest, op1, imm); 358 } else { 359 return decodeNeonUTwoShiftUSReg<BaseD>( 360 size, machInst, dest, op1, imm); 361 } 362 } 363 364 template <template <typename T> class Base> 365 StaticInstPtr 366 decodeNeonSTwoShiftUSReg(unsigned size, 367 ExtMachInst machInst, IntRegIndex dest, 368 IntRegIndex op1, uint64_t imm) 369 { 370 switch (size) { 371 case 0: 372 return new Base<int8_t>(machInst, dest, op1, imm); 373 case 1: 374 return new Base<int16_t>(machInst, dest, op1, imm); 375 case 2: 376 return new Base<int32_t>(machInst, dest, op1, imm); 377 default: 378 return new Unknown(machInst); 379 } 380 } 381 382 template <template <typename T> class BaseD, 383 template <typename T> class BaseQ> 384 StaticInstPtr 385 decodeNeonSTwoShiftSReg(bool q, unsigned size, 386 ExtMachInst machInst, IntRegIndex dest, 387 IntRegIndex op1, uint64_t imm) 388 { 389 if (q) { 390 return decodeNeonSTwoShiftUSReg<BaseQ>( 391 size, machInst, dest, op1, imm); 392 } else { 393 return decodeNeonSTwoShiftUSReg<BaseD>( 394 size, machInst, dest, op1, imm); 395 } 396 } 397 398 template <template <typename T> class BaseD, 399 template <typename T> class BaseQ> 400 StaticInstPtr 401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size, 402 ExtMachInst machInst, IntRegIndex dest, 403 IntRegIndex op1, uint64_t imm) 404 { 405 if (notSigned) { 406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 407 q, size, machInst, dest, op1, imm); 408 } else { 409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 410 q, size, machInst, dest, op1, imm); 411 } 412 } 413 414 template <template <typename T> class Base> 415 StaticInstPtr 416 decodeNeonUTwoMiscUSReg(unsigned size, 417 ExtMachInst machInst, IntRegIndex dest, 418 IntRegIndex op1) 419 { 420 switch (size) { 421 case 0: 422 return new Base<uint8_t>(machInst, dest, op1); 423 case 1: 424 return new Base<uint16_t>(machInst, dest, op1); 425 case 2: 426 return new Base<uint32_t>(machInst, dest, op1); 427 default: 428 return new Unknown(machInst); 429 } 430 } 431 432 template <template <typename T> class Base> 433 StaticInstPtr 434 decodeNeonSTwoMiscUSReg(unsigned size, 435 ExtMachInst machInst, IntRegIndex dest, 436 IntRegIndex op1) 437 { 438 switch (size) { 439 case 0: 440 return new Base<int8_t>(machInst, dest, op1); 441 case 1: 442 return new Base<int16_t>(machInst, dest, op1); 443 case 2: 444 return new Base<int32_t>(machInst, dest, op1); 445 default: 446 return new Unknown(machInst); 447 } 448 } 449 450 template <template <typename T> class BaseD, 451 template <typename T> class BaseQ> 452 StaticInstPtr 453 decodeNeonUTwoMiscSReg(bool q, unsigned size, 454 ExtMachInst machInst, IntRegIndex dest, 455 IntRegIndex op1) 456 { 457 if (q) { 458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 459 } else { 460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 461 } 462 } 463 464 template <template <typename T> class BaseD, 465 template <typename T> class BaseQ> 466 StaticInstPtr 467 decodeNeonSTwoMiscSReg(bool q, unsigned size, 468 ExtMachInst machInst, IntRegIndex dest, 469 IntRegIndex op1) 470 { 471 if (q) { 472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 473 } else { 474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 475 } 476 } 477 478 template <template <typename T> class Base> 479 StaticInstPtr 480 decodeNeonUTwoMiscUReg(unsigned size, 481 ExtMachInst machInst, IntRegIndex dest, 482 IntRegIndex op1) 483 { 484 switch (size) { 485 case 0: 486 return new Base<uint8_t>(machInst, dest, op1); 487 case 1: 488 return new Base<uint16_t>(machInst, dest, op1); 489 case 2: 490 return new Base<uint32_t>(machInst, dest, op1); 491 case 3: 492 return new Base<uint64_t>(machInst, dest, op1); 493 default: 494 return new Unknown(machInst); 495 } 496 } 497 498 template <template <typename T> class Base> 499 StaticInstPtr 500 decodeNeonSTwoMiscUReg(unsigned size, 501 ExtMachInst machInst, IntRegIndex dest, 502 IntRegIndex op1) 503 { 504 switch (size) { 505 case 0: 506 return new Base<int8_t>(machInst, dest, op1); 507 case 1: 508 return new Base<int16_t>(machInst, dest, op1); 509 case 2: 510 return new Base<int32_t>(machInst, dest, op1); 511 case 3: 512 return new Base<int64_t>(machInst, dest, op1); 513 default: 514 return new Unknown(machInst); 515 } 516 } 517 518 template <template <typename T> class BaseD, 519 template <typename T> class BaseQ> 520 StaticInstPtr 521 decodeNeonSTwoMiscReg(bool q, unsigned size, 522 ExtMachInst machInst, IntRegIndex dest, 523 IntRegIndex op1) 524 { 525 if (q) { 526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 527 } else { 528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1); 529 } 530 } 531 532 template <template <typename T> class BaseD, 533 template <typename T> class BaseQ> 534 StaticInstPtr 535 decodeNeonUTwoMiscReg(bool q, unsigned size, 536 ExtMachInst machInst, IntRegIndex dest, 537 IntRegIndex op1) 538 { 539 if (q) { 540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 541 } else { 542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1); 543 } 544 } 545 546 template <template <typename T> class BaseD, 547 template <typename T> class BaseQ> 548 StaticInstPtr 549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size, 550 ExtMachInst machInst, IntRegIndex dest, 551 IntRegIndex op1) 552 { 553 if (notSigned) { 554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 555 q, size, machInst, dest, op1); 556 } else { 557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 558 q, size, machInst, dest, op1); 559 } 560 } 561 562}}; 563 564output exec {{ 565 static float 566 vcgtFunc(float op1, float op2) 567 { 568 if (isSnan(op1) || isSnan(op2)) 569 return 2.0; 570 return (op1 > op2) ? 0.0 : 1.0; 571 } 572 573 static float 574 vcgeFunc(float op1, float op2) 575 { 576 if (isSnan(op1) || isSnan(op2)) 577 return 2.0; 578 return (op1 >= op2) ? 0.0 : 1.0; 579 } 580 581 static float 582 vceqFunc(float op1, float op2) 583 { 584 if (isSnan(op1) || isSnan(op2)) 585 return 2.0; 586 return (op1 == op2) ? 0.0 : 1.0; 587 } 588 589 static float 590 vcleFunc(float op1, float op2) 591 { 592 if (isSnan(op1) || isSnan(op2)) 593 return 2.0; 594 return (op1 <= op2) ? 0.0 : 1.0; 595 } 596 597 static float 598 vcltFunc(float op1, float op2) 599 { 600 if (isSnan(op1) || isSnan(op2)) 601 return 2.0; 602 return (op1 < op2) ? 0.0 : 1.0; 603 } 604 605 static float 606 vacgtFunc(float op1, float op2) 607 { 608 if (isSnan(op1) || isSnan(op2)) 609 return 2.0; 610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0; 611 } 612 613 static float 614 vacgeFunc(float op1, float op2) 615 { 616 if (isSnan(op1) || isSnan(op2)) 617 return 2.0; 618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0; 619 } 620}}; 621 622let {{ 623 simdEnabledCheckCode = ''' 624 if (!neonEnabled(Cpacr, Cpsr, Fpexc)) 625 return disabledFault(); 626 ''' 627}}; 628 629let {{ 630 631 header_output = "" 632 exec_output = "" 633 634 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t") 635 unsignedTypes = smallUnsignedTypes + ("uint64_t",) 636 smallSignedTypes = ("int8_t", "int16_t", "int32_t") 637 signedTypes = smallSignedTypes + ("int64_t",) 638 smallTypes = smallUnsignedTypes + smallSignedTypes 639 allTypes = unsignedTypes + signedTypes 640 641 def threeEqualRegInst(name, Name, types, rCount, op, 642 readDest=False, pairwise=False): 643 global header_output, exec_output 644 eWalkCode = simdEnabledCheckCode + ''' 645 RegVect srcReg1, srcReg2, destReg; 646 ''' 647 for reg in range(rCount): 648 eWalkCode += ''' 649 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 650 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 651 ''' % { "reg" : reg } 652 if readDest: 653 eWalkCode += ''' 654 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 655 ''' % { "reg" : reg } 656 readDestCode = '' 657 if readDest: 658 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 659 if pairwise: 660 eWalkCode += ''' 661 for (unsigned i = 0; i < eCount; i++) { 662 Element srcElem1 = gtoh(2 * i < eCount ? 663 srcReg1.elements[2 * i] : 664 srcReg2.elements[2 * i - eCount]); 665 Element srcElem2 = gtoh(2 * i < eCount ? 666 srcReg1.elements[2 * i + 1] : 667 srcReg2.elements[2 * i + 1 - eCount]); 668 Element destElem; 669 %(readDest)s 670 %(op)s 671 destReg.elements[i] = htog(destElem); 672 } 673 ''' % { "op" : op, "readDest" : readDestCode } 674 else: 675 eWalkCode += ''' 676 for (unsigned i = 0; i < eCount; i++) { 677 Element srcElem1 = gtoh(srcReg1.elements[i]); 678 Element srcElem2 = gtoh(srcReg2.elements[i]); 679 Element destElem; 680 %(readDest)s 681 %(op)s 682 destReg.elements[i] = htog(destElem); 683 } 684 ''' % { "op" : op, "readDest" : readDestCode } 685 for reg in range(rCount): 686 eWalkCode += ''' 687 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 688 ''' % { "reg" : reg } 689 iop = InstObjParams(name, Name, 690 "RegRegRegOp", 691 { "code": eWalkCode, 692 "r_count": rCount, 693 "predicate_test": predicateTest }, []) 694 header_output += NeonRegRegRegOpDeclare.subst(iop) 695 exec_output += NeonEqualRegExecute.subst(iop) 696 for type in types: 697 substDict = { "targs" : type, 698 "class_name" : Name } 699 exec_output += NeonExecDeclare.subst(substDict) 700 701 def threeEqualRegInstFp(name, Name, types, rCount, op, 702 readDest=False, pairwise=False, toInt=False): 703 global header_output, exec_output 704 eWalkCode = simdEnabledCheckCode + ''' 705 typedef FloatReg FloatVect[rCount]; 706 FloatVect srcRegs1, srcRegs2; 707 ''' 708 if toInt: 709 eWalkCode += 'RegVect destRegs;\n' 710 else: 711 eWalkCode += 'FloatVect destRegs;\n' 712 for reg in range(rCount): 713 eWalkCode += ''' 714 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 715 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 716 ''' % { "reg" : reg } 717 if readDest: 718 if toInt: 719 eWalkCode += ''' 720 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 721 ''' % { "reg" : reg } 722 else: 723 eWalkCode += ''' 724 destRegs[%(reg)d] = FpDestP%(reg)d; 725 ''' % { "reg" : reg } 726 readDestCode = '' 727 if readDest: 728 readDestCode = 'destReg = destRegs[r];' 729 destType = 'FloatReg' 730 writeDest = 'destRegs[r] = destReg;' 731 if toInt: 732 destType = 'FloatRegBits' 733 writeDest = 'destRegs.regs[r] = destReg;' 734 if pairwise: 735 eWalkCode += ''' 736 for (unsigned r = 0; r < rCount; r++) { 737 FloatReg srcReg1 = (2 * r < rCount) ? 738 srcRegs1[2 * r] : srcRegs2[2 * r - rCount]; 739 FloatReg srcReg2 = (2 * r < rCount) ? 740 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount]; 741 %(destType)s destReg; 742 %(readDest)s 743 %(op)s 744 %(writeDest)s 745 } 746 ''' % { "op" : op, 747 "readDest" : readDestCode, 748 "destType" : destType, 749 "writeDest" : writeDest } 750 else: 751 eWalkCode += ''' 752 for (unsigned r = 0; r < rCount; r++) { 753 FloatReg srcReg1 = srcRegs1[r]; 754 FloatReg srcReg2 = srcRegs2[r]; 755 %(destType)s destReg; 756 %(readDest)s 757 %(op)s 758 %(writeDest)s 759 } 760 ''' % { "op" : op, 761 "readDest" : readDestCode, 762 "destType" : destType, 763 "writeDest" : writeDest } 764 for reg in range(rCount): 765 if toInt: 766 eWalkCode += ''' 767 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d]; 768 ''' % { "reg" : reg } 769 else: 770 eWalkCode += ''' 771 FpDestP%(reg)d = destRegs[%(reg)d]; 772 ''' % { "reg" : reg } 773 iop = InstObjParams(name, Name, 774 "FpRegRegRegOp", 775 { "code": eWalkCode, 776 "r_count": rCount, 777 "predicate_test": predicateTest }, []) 778 header_output += NeonRegRegRegOpDeclare.subst(iop) 779 exec_output += NeonEqualRegExecute.subst(iop) 780 for type in types: 781 substDict = { "targs" : type, 782 "class_name" : Name } 783 exec_output += NeonExecDeclare.subst(substDict) 784 785 def threeUnequalRegInst(name, Name, types, op, 786 bigSrc1, bigSrc2, bigDest, readDest): 787 global header_output, exec_output 788 src1Cnt = src2Cnt = destCnt = 2 789 src1Prefix = src2Prefix = destPrefix = '' 790 if bigSrc1: 791 src1Cnt = 4 792 src1Prefix = 'Big' 793 if bigSrc2: 794 src2Cnt = 4 795 src2Prefix = 'Big' 796 if bigDest: 797 destCnt = 4 798 destPrefix = 'Big' 799 eWalkCode = simdEnabledCheckCode + ''' 800 %sRegVect srcReg1; 801 %sRegVect srcReg2; 802 %sRegVect destReg; 803 ''' % (src1Prefix, src2Prefix, destPrefix) 804 for reg in range(src1Cnt): 805 eWalkCode += ''' 806 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 807 ''' % { "reg" : reg } 808 for reg in range(src2Cnt): 809 eWalkCode += ''' 810 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 811 ''' % { "reg" : reg } 812 if readDest: 813 for reg in range(destCnt): 814 eWalkCode += ''' 815 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 816 ''' % { "reg" : reg } 817 readDestCode = '' 818 if readDest: 819 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 820 eWalkCode += ''' 821 for (unsigned i = 0; i < eCount; i++) { 822 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 823 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]); 824 %(destPrefix)sElement destElem; 825 %(readDest)s 826 %(op)s 827 destReg.elements[i] = htog(destElem); 828 } 829 ''' % { "op" : op, "readDest" : readDestCode, 830 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 831 "destPrefix" : destPrefix } 832 for reg in range(destCnt): 833 eWalkCode += ''' 834 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 835 ''' % { "reg" : reg } 836 iop = InstObjParams(name, Name, 837 "RegRegRegOp", 838 { "code": eWalkCode, 839 "r_count": 2, 840 "predicate_test": predicateTest }, []) 841 header_output += NeonRegRegRegOpDeclare.subst(iop) 842 exec_output += NeonUnequalRegExecute.subst(iop) 843 for type in types: 844 substDict = { "targs" : type, 845 "class_name" : Name } 846 exec_output += NeonExecDeclare.subst(substDict) 847 848 def threeRegNarrowInst(name, Name, types, op, readDest=False): 849 threeUnequalRegInst(name, Name, types, op, 850 True, True, False, readDest) 851 852 def threeRegLongInst(name, Name, types, op, readDest=False): 853 threeUnequalRegInst(name, Name, types, op, 854 False, False, True, readDest) 855 856 def threeRegWideInst(name, Name, types, op, readDest=False): 857 threeUnequalRegInst(name, Name, types, op, 858 True, False, True, readDest) 859 860 def twoEqualRegInst(name, Name, types, rCount, op, readDest=False): 861 global header_output, exec_output 862 eWalkCode = simdEnabledCheckCode + ''' 863 RegVect srcReg1, srcReg2, destReg; 864 ''' 865 for reg in range(rCount): 866 eWalkCode += ''' 867 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 868 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 869 ''' % { "reg" : reg } 870 if readDest: 871 eWalkCode += ''' 872 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 873 ''' % { "reg" : reg } 874 readDestCode = '' 875 if readDest: 876 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 877 eWalkCode += ''' 878 assert(imm >= 0 && imm < eCount); 879 for (unsigned i = 0; i < eCount; i++) { 880 Element srcElem1 = gtoh(srcReg1.elements[i]); 881 Element srcElem2 = gtoh(srcReg2.elements[imm]); 882 Element destElem; 883 %(readDest)s 884 %(op)s 885 destReg.elements[i] = htog(destElem); 886 } 887 ''' % { "op" : op, "readDest" : readDestCode } 888 for reg in range(rCount): 889 eWalkCode += ''' 890 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 891 ''' % { "reg" : reg } 892 iop = InstObjParams(name, Name, 893 "RegRegRegImmOp", 894 { "code": eWalkCode, 895 "r_count": rCount, 896 "predicate_test": predicateTest }, []) 897 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 898 exec_output += NeonEqualRegExecute.subst(iop) 899 for type in types: 900 substDict = { "targs" : type, 901 "class_name" : Name } 902 exec_output += NeonExecDeclare.subst(substDict) 903 904 def twoRegLongInst(name, Name, types, op, readDest=False): 905 global header_output, exec_output 906 rCount = 2 907 eWalkCode = simdEnabledCheckCode + ''' 908 RegVect srcReg1, srcReg2; 909 BigRegVect destReg; 910 ''' 911 for reg in range(rCount): 912 eWalkCode += ''' 913 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 914 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);; 915 ''' % { "reg" : reg } 916 if readDest: 917 for reg in range(2 * rCount): 918 eWalkCode += ''' 919 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 920 ''' % { "reg" : reg } 921 readDestCode = '' 922 if readDest: 923 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 924 eWalkCode += ''' 925 assert(imm >= 0 && imm < eCount); 926 for (unsigned i = 0; i < eCount; i++) { 927 Element srcElem1 = gtoh(srcReg1.elements[i]); 928 Element srcElem2 = gtoh(srcReg2.elements[imm]); 929 BigElement destElem; 930 %(readDest)s 931 %(op)s 932 destReg.elements[i] = htog(destElem); 933 } 934 ''' % { "op" : op, "readDest" : readDestCode } 935 for reg in range(2 * rCount): 936 eWalkCode += ''' 937 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 938 ''' % { "reg" : reg } 939 iop = InstObjParams(name, Name, 940 "RegRegRegImmOp", 941 { "code": eWalkCode, 942 "r_count": rCount, 943 "predicate_test": predicateTest }, []) 944 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 945 exec_output += NeonUnequalRegExecute.subst(iop) 946 for type in types: 947 substDict = { "targs" : type, 948 "class_name" : Name } 949 exec_output += NeonExecDeclare.subst(substDict) 950 951 def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False): 952 global header_output, exec_output 953 eWalkCode = simdEnabledCheckCode + ''' 954 typedef FloatReg FloatVect[rCount]; 955 FloatVect srcRegs1, srcRegs2, destRegs; 956 ''' 957 for reg in range(rCount): 958 eWalkCode += ''' 959 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 960 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 961 ''' % { "reg" : reg } 962 if readDest: 963 eWalkCode += ''' 964 destRegs[%(reg)d] = FpDestP%(reg)d; 965 ''' % { "reg" : reg } 966 readDestCode = '' 967 if readDest: 968 readDestCode = 'destReg = destRegs[i];' 969 eWalkCode += ''' 970 assert(imm >= 0 && imm < rCount); 971 for (unsigned i = 0; i < rCount; i++) { 972 FloatReg srcReg1 = srcRegs1[i]; 973 FloatReg srcReg2 = srcRegs2[imm]; 974 FloatReg destReg; 975 %(readDest)s 976 %(op)s 977 destRegs[i] = destReg; 978 } 979 ''' % { "op" : op, "readDest" : readDestCode } 980 for reg in range(rCount): 981 eWalkCode += ''' 982 FpDestP%(reg)d = destRegs[%(reg)d]; 983 ''' % { "reg" : reg } 984 iop = InstObjParams(name, Name, 985 "FpRegRegRegImmOp", 986 { "code": eWalkCode, 987 "r_count": rCount, 988 "predicate_test": predicateTest }, []) 989 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 990 exec_output += NeonEqualRegExecute.subst(iop) 991 for type in types: 992 substDict = { "targs" : type, 993 "class_name" : Name } 994 exec_output += NeonExecDeclare.subst(substDict) 995 996 def twoRegShiftInst(name, Name, types, rCount, op, 997 readDest=False, toInt=False, fromInt=False): 998 global header_output, exec_output 999 eWalkCode = simdEnabledCheckCode + ''' 1000 RegVect srcRegs1, destRegs; 1001 ''' 1002 for reg in range(rCount): 1003 eWalkCode += ''' 1004 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1005 ''' % { "reg" : reg } 1006 if readDest: 1007 eWalkCode += ''' 1008 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1009 ''' % { "reg" : reg } 1010 readDestCode = '' 1011 if readDest: 1012 readDestCode = 'destElem = gtoh(destRegs.elements[i]);' 1013 if toInt: 1014 readDestCode = 'destReg = gtoh(destRegs.regs[i]);' 1015 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);' 1016 if fromInt: 1017 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);' 1018 declDest = 'Element destElem;' 1019 writeDestCode = 'destRegs.elements[i] = htog(destElem);' 1020 if toInt: 1021 declDest = 'FloatRegBits destReg;' 1022 writeDestCode = 'destRegs.regs[i] = htog(destReg);' 1023 eWalkCode += ''' 1024 for (unsigned i = 0; i < eCount; i++) { 1025 %(readOp)s 1026 %(declDest)s 1027 %(readDest)s 1028 %(op)s 1029 %(writeDest)s 1030 } 1031 ''' % { "readOp" : readOpCode, 1032 "declDest" : declDest, 1033 "readDest" : readDestCode, 1034 "op" : op, 1035 "writeDest" : writeDestCode } 1036 for reg in range(rCount): 1037 eWalkCode += ''' 1038 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]); 1039 ''' % { "reg" : reg } 1040 iop = InstObjParams(name, Name, 1041 "RegRegImmOp", 1042 { "code": eWalkCode, 1043 "r_count": rCount, 1044 "predicate_test": predicateTest }, []) 1045 header_output += NeonRegRegImmOpDeclare.subst(iop) 1046 exec_output += NeonEqualRegExecute.subst(iop) 1047 for type in types: 1048 substDict = { "targs" : type, 1049 "class_name" : Name } 1050 exec_output += NeonExecDeclare.subst(substDict) 1051 1052 def twoRegNarrowShiftInst(name, Name, types, op, readDest=False): 1053 global header_output, exec_output 1054 eWalkCode = simdEnabledCheckCode + ''' 1055 BigRegVect srcReg1; 1056 RegVect destReg; 1057 ''' 1058 for reg in range(4): 1059 eWalkCode += ''' 1060 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1061 ''' % { "reg" : reg } 1062 if readDest: 1063 for reg in range(2): 1064 eWalkCode += ''' 1065 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1066 ''' % { "reg" : reg } 1067 readDestCode = '' 1068 if readDest: 1069 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1070 eWalkCode += ''' 1071 for (unsigned i = 0; i < eCount; i++) { 1072 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1073 Element destElem; 1074 %(readDest)s 1075 %(op)s 1076 destReg.elements[i] = htog(destElem); 1077 } 1078 ''' % { "op" : op, "readDest" : readDestCode } 1079 for reg in range(2): 1080 eWalkCode += ''' 1081 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1082 ''' % { "reg" : reg } 1083 iop = InstObjParams(name, Name, 1084 "RegRegImmOp", 1085 { "code": eWalkCode, 1086 "r_count": 2, 1087 "predicate_test": predicateTest }, []) 1088 header_output += NeonRegRegImmOpDeclare.subst(iop) 1089 exec_output += NeonUnequalRegExecute.subst(iop) 1090 for type in types: 1091 substDict = { "targs" : type, 1092 "class_name" : Name } 1093 exec_output += NeonExecDeclare.subst(substDict) 1094 1095 def twoRegLongShiftInst(name, Name, types, op, readDest=False): 1096 global header_output, exec_output 1097 eWalkCode = simdEnabledCheckCode + ''' 1098 RegVect srcReg1; 1099 BigRegVect destReg; 1100 ''' 1101 for reg in range(2): 1102 eWalkCode += ''' 1103 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1104 ''' % { "reg" : reg } 1105 if readDest: 1106 for reg in range(4): 1107 eWalkCode += ''' 1108 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1109 ''' % { "reg" : reg } 1110 readDestCode = '' 1111 if readDest: 1112 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1113 eWalkCode += ''' 1114 for (unsigned i = 0; i < eCount; i++) { 1115 Element srcElem1 = gtoh(srcReg1.elements[i]); 1116 BigElement destElem; 1117 %(readDest)s 1118 %(op)s 1119 destReg.elements[i] = htog(destElem); 1120 } 1121 ''' % { "op" : op, "readDest" : readDestCode } 1122 for reg in range(4): 1123 eWalkCode += ''' 1124 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1125 ''' % { "reg" : reg } 1126 iop = InstObjParams(name, Name, 1127 "RegRegImmOp", 1128 { "code": eWalkCode, 1129 "r_count": 2, 1130 "predicate_test": predicateTest }, []) 1131 header_output += NeonRegRegImmOpDeclare.subst(iop) 1132 exec_output += NeonUnequalRegExecute.subst(iop) 1133 for type in types: 1134 substDict = { "targs" : type, 1135 "class_name" : Name } 1136 exec_output += NeonExecDeclare.subst(substDict) 1137 1138 def twoRegMiscInst(name, Name, types, rCount, op, readDest=False): 1139 global header_output, exec_output 1140 eWalkCode = simdEnabledCheckCode + ''' 1141 RegVect srcReg1, destReg; 1142 ''' 1143 for reg in range(rCount): 1144 eWalkCode += ''' 1145 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1146 ''' % { "reg" : reg } 1147 if readDest: 1148 eWalkCode += ''' 1149 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1150 ''' % { "reg" : reg } 1151 readDestCode = '' 1152 if readDest: 1153 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1154 eWalkCode += ''' 1155 for (unsigned i = 0; i < eCount; i++) { 1156 unsigned j = i; 1157 Element srcElem1 = gtoh(srcReg1.elements[i]); 1158 Element destElem; 1159 %(readDest)s 1160 %(op)s 1161 destReg.elements[j] = htog(destElem); 1162 } 1163 ''' % { "op" : op, "readDest" : readDestCode } 1164 for reg in range(rCount): 1165 eWalkCode += ''' 1166 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1167 ''' % { "reg" : reg } 1168 iop = InstObjParams(name, Name, 1169 "RegRegOp", 1170 { "code": eWalkCode, 1171 "r_count": rCount, 1172 "predicate_test": predicateTest }, []) 1173 header_output += NeonRegRegOpDeclare.subst(iop) 1174 exec_output += NeonEqualRegExecute.subst(iop) 1175 for type in types: 1176 substDict = { "targs" : type, 1177 "class_name" : Name } 1178 exec_output += NeonExecDeclare.subst(substDict) 1179 1180 def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False): 1181 global header_output, exec_output 1182 eWalkCode = simdEnabledCheckCode + ''' 1183 RegVect srcReg1, destReg; 1184 ''' 1185 for reg in range(rCount): 1186 eWalkCode += ''' 1187 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1188 ''' % { "reg" : reg } 1189 if readDest: 1190 eWalkCode += ''' 1191 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1192 ''' % { "reg" : reg } 1193 readDestCode = '' 1194 if readDest: 1195 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1196 eWalkCode += ''' 1197 for (unsigned i = 0; i < eCount; i++) { 1198 Element srcElem1 = gtoh(srcReg1.elements[imm]); 1199 Element destElem; 1200 %(readDest)s 1201 %(op)s 1202 destReg.elements[i] = htog(destElem); 1203 } 1204 ''' % { "op" : op, "readDest" : readDestCode } 1205 for reg in range(rCount): 1206 eWalkCode += ''' 1207 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1208 ''' % { "reg" : reg } 1209 iop = InstObjParams(name, Name, 1210 "RegRegImmOp", 1211 { "code": eWalkCode, 1212 "r_count": rCount, 1213 "predicate_test": predicateTest }, []) 1214 header_output += NeonRegRegImmOpDeclare.subst(iop) 1215 exec_output += NeonEqualRegExecute.subst(iop) 1216 for type in types: 1217 substDict = { "targs" : type, 1218 "class_name" : Name } 1219 exec_output += NeonExecDeclare.subst(substDict) 1220 1221 def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False): 1222 global header_output, exec_output 1223 eWalkCode = simdEnabledCheckCode + ''' 1224 RegVect srcReg1, destReg; 1225 ''' 1226 for reg in range(rCount): 1227 eWalkCode += ''' 1228 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1229 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1230 ''' % { "reg" : reg } 1231 if readDest: 1232 eWalkCode += ''' 1233 ''' % { "reg" : reg } 1234 readDestCode = '' 1235 if readDest: 1236 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1237 eWalkCode += op 1238 for reg in range(rCount): 1239 eWalkCode += ''' 1240 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1241 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]); 1242 ''' % { "reg" : reg } 1243 iop = InstObjParams(name, Name, 1244 "RegRegOp", 1245 { "code": eWalkCode, 1246 "r_count": rCount, 1247 "predicate_test": predicateTest }, []) 1248 header_output += NeonRegRegOpDeclare.subst(iop) 1249 exec_output += NeonEqualRegExecute.subst(iop) 1250 for type in types: 1251 substDict = { "targs" : type, 1252 "class_name" : Name } 1253 exec_output += NeonExecDeclare.subst(substDict) 1254 1255 def twoRegMiscInstFp(name, Name, types, rCount, op, 1256 readDest=False, toInt=False): 1257 global header_output, exec_output 1258 eWalkCode = simdEnabledCheckCode + ''' 1259 typedef FloatReg FloatVect[rCount]; 1260 FloatVect srcRegs1; 1261 ''' 1262 if toInt: 1263 eWalkCode += 'RegVect destRegs;\n' 1264 else: 1265 eWalkCode += 'FloatVect destRegs;\n' 1266 for reg in range(rCount): 1267 eWalkCode += ''' 1268 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1269 ''' % { "reg" : reg } 1270 if readDest: 1271 if toInt: 1272 eWalkCode += ''' 1273 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1274 ''' % { "reg" : reg } 1275 else: 1276 eWalkCode += ''' 1277 destRegs[%(reg)d] = FpDestP%(reg)d; 1278 ''' % { "reg" : reg } 1279 readDestCode = '' 1280 if readDest: 1281 readDestCode = 'destReg = destRegs[i];' 1282 destType = 'FloatReg' 1283 writeDest = 'destRegs[r] = destReg;' 1284 if toInt: 1285 destType = 'FloatRegBits' 1286 writeDest = 'destRegs.regs[r] = destReg;' 1287 eWalkCode += ''' 1288 for (unsigned r = 0; r < rCount; r++) { 1289 FloatReg srcReg1 = srcRegs1[r]; 1290 %(destType)s destReg; 1291 %(readDest)s 1292 %(op)s 1293 %(writeDest)s 1294 } 1295 ''' % { "op" : op, 1296 "readDest" : readDestCode, 1297 "destType" : destType, 1298 "writeDest" : writeDest } 1299 for reg in range(rCount): 1300 if toInt: 1301 eWalkCode += ''' 1302 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d]; 1303 ''' % { "reg" : reg } 1304 else: 1305 eWalkCode += ''' 1306 FpDestP%(reg)d = destRegs[%(reg)d]; 1307 ''' % { "reg" : reg } 1308 iop = InstObjParams(name, Name, 1309 "FpRegRegOp", 1310 { "code": eWalkCode, 1311 "r_count": rCount, 1312 "predicate_test": predicateTest }, []) 1313 header_output += NeonRegRegOpDeclare.subst(iop) 1314 exec_output += NeonEqualRegExecute.subst(iop) 1315 for type in types: 1316 substDict = { "targs" : type, 1317 "class_name" : Name } 1318 exec_output += NeonExecDeclare.subst(substDict) 1319 1320 def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False): 1321 global header_output, exec_output 1322 eWalkCode = simdEnabledCheckCode + ''' 1323 RegVect srcRegs; 1324 BigRegVect destReg; 1325 ''' 1326 for reg in range(rCount): 1327 eWalkCode += ''' 1328 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1329 ''' % { "reg" : reg } 1330 if readDest: 1331 eWalkCode += ''' 1332 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1333 ''' % { "reg" : reg } 1334 readDestCode = '' 1335 if readDest: 1336 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1337 eWalkCode += ''' 1338 for (unsigned i = 0; i < eCount / 2; i++) { 1339 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 1340 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 1341 BigElement destElem; 1342 %(readDest)s 1343 %(op)s 1344 destReg.elements[i] = htog(destElem); 1345 } 1346 ''' % { "op" : op, "readDest" : readDestCode } 1347 for reg in range(rCount): 1348 eWalkCode += ''' 1349 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1350 ''' % { "reg" : reg } 1351 iop = InstObjParams(name, Name, 1352 "RegRegOp", 1353 { "code": eWalkCode, 1354 "r_count": rCount, 1355 "predicate_test": predicateTest }, []) 1356 header_output += NeonRegRegOpDeclare.subst(iop) 1357 exec_output += NeonUnequalRegExecute.subst(iop) 1358 for type in types: 1359 substDict = { "targs" : type, 1360 "class_name" : Name } 1361 exec_output += NeonExecDeclare.subst(substDict) 1362 1363 def twoRegNarrowMiscInst(name, Name, types, op, readDest=False): 1364 global header_output, exec_output 1365 eWalkCode = simdEnabledCheckCode + ''' 1366 BigRegVect srcReg1; 1367 RegVect destReg; 1368 ''' 1369 for reg in range(4): 1370 eWalkCode += ''' 1371 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1372 ''' % { "reg" : reg } 1373 if readDest: 1374 for reg in range(2): 1375 eWalkCode += ''' 1376 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1377 ''' % { "reg" : reg } 1378 readDestCode = '' 1379 if readDest: 1380 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1381 eWalkCode += ''' 1382 for (unsigned i = 0; i < eCount; i++) { 1383 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1384 Element destElem; 1385 %(readDest)s 1386 %(op)s 1387 destReg.elements[i] = htog(destElem); 1388 } 1389 ''' % { "op" : op, "readDest" : readDestCode } 1390 for reg in range(2): 1391 eWalkCode += ''' 1392 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1393 ''' % { "reg" : reg } 1394 iop = InstObjParams(name, Name, 1395 "RegRegOp", 1396 { "code": eWalkCode, 1397 "r_count": 2, 1398 "predicate_test": predicateTest }, []) 1399 header_output += NeonRegRegOpDeclare.subst(iop) 1400 exec_output += NeonUnequalRegExecute.subst(iop) 1401 for type in types: 1402 substDict = { "targs" : type, 1403 "class_name" : Name } 1404 exec_output += NeonExecDeclare.subst(substDict) 1405 1406 def oneRegImmInst(name, Name, types, rCount, op, readDest=False): 1407 global header_output, exec_output 1408 eWalkCode = simdEnabledCheckCode + ''' 1409 RegVect destReg; 1410 ''' 1411 if readDest: 1412 for reg in range(rCount): 1413 eWalkCode += ''' 1414 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1415 ''' % { "reg" : reg } 1416 readDestCode = '' 1417 if readDest: 1418 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1419 eWalkCode += ''' 1420 for (unsigned i = 0; i < eCount; i++) { 1421 Element destElem; 1422 %(readDest)s 1423 %(op)s 1424 destReg.elements[i] = htog(destElem); 1425 } 1426 ''' % { "op" : op, "readDest" : readDestCode } 1427 for reg in range(rCount): 1428 eWalkCode += ''' 1429 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1430 ''' % { "reg" : reg } 1431 iop = InstObjParams(name, Name, 1432 "RegImmOp", 1433 { "code": eWalkCode, 1434 "r_count": rCount, 1435 "predicate_test": predicateTest }, []) 1436 header_output += NeonRegImmOpDeclare.subst(iop) 1437 exec_output += NeonEqualRegExecute.subst(iop) 1438 for type in types: 1439 substDict = { "targs" : type, 1440 "class_name" : Name } 1441 exec_output += NeonExecDeclare.subst(substDict) 1442 1443 def twoRegLongMiscInst(name, Name, types, op, readDest=False): 1444 global header_output, exec_output 1445 eWalkCode = simdEnabledCheckCode + ''' 1446 RegVect srcReg1; 1447 BigRegVect destReg; 1448 ''' 1449 for reg in range(2): 1450 eWalkCode += ''' 1451 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1452 ''' % { "reg" : reg } 1453 if readDest: 1454 for reg in range(4): 1455 eWalkCode += ''' 1456 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1457 ''' % { "reg" : reg } 1458 readDestCode = '' 1459 if readDest: 1460 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1461 eWalkCode += ''' 1462 for (unsigned i = 0; i < eCount; i++) { 1463 Element srcElem1 = gtoh(srcReg1.elements[i]); 1464 BigElement destElem; 1465 %(readDest)s 1466 %(op)s 1467 destReg.elements[i] = htog(destElem); 1468 } 1469 ''' % { "op" : op, "readDest" : readDestCode } 1470 for reg in range(4): 1471 eWalkCode += ''' 1472 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1473 ''' % { "reg" : reg } 1474 iop = InstObjParams(name, Name, 1475 "RegRegOp", 1476 { "code": eWalkCode, 1477 "r_count": 2, 1478 "predicate_test": predicateTest }, []) 1479 header_output += NeonRegRegOpDeclare.subst(iop) 1480 exec_output += NeonUnequalRegExecute.subst(iop) 1481 for type in types: 1482 substDict = { "targs" : type, 1483 "class_name" : Name } 1484 exec_output += NeonExecDeclare.subst(substDict) 1485 1486 vhaddCode = ''' 1487 Element carryBit = 1488 (((unsigned)srcElem1 & 0x1) + 1489 ((unsigned)srcElem2 & 0x1)) >> 1; 1490 // Use division instead of a shift to ensure the sign extension works 1491 // right. The compiler will figure out if it can be a shift. Mask the 1492 // inputs so they get truncated correctly. 1493 destElem = (((srcElem1 & ~(Element)1) / 2) + 1494 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1495 ''' 1496 threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode) 1497 threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode) 1498 1499 vrhaddCode = ''' 1500 Element carryBit = 1501 (((unsigned)srcElem1 & 0x1) + 1502 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 1503 // Use division instead of a shift to ensure the sign extension works 1504 // right. The compiler will figure out if it can be a shift. Mask the 1505 // inputs so they get truncated correctly. 1506 destElem = (((srcElem1 & ~(Element)1) / 2) + 1507 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1508 ''' 1509 threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode) 1510 threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode) 1511 1512 vhsubCode = ''' 1513 Element barrowBit = 1514 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1515 // Use division instead of a shift to ensure the sign extension works 1516 // right. The compiler will figure out if it can be a shift. Mask the 1517 // inputs so they get truncated correctly. 1518 destElem = (((srcElem1 & ~(Element)1) / 2) - 1519 ((srcElem2 & ~(Element)1) / 2)) - barrowBit; 1520 ''' 1521 threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode) 1522 threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode) 1523 1524 vandCode = ''' 1525 destElem = srcElem1 & srcElem2; 1526 ''' 1527 threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode) 1528 threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode) 1529 1530 vbicCode = ''' 1531 destElem = srcElem1 & ~srcElem2; 1532 ''' 1533 threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode) 1534 threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode) 1535 1536 vorrCode = ''' 1537 destElem = srcElem1 | srcElem2; 1538 ''' 1539 threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode) 1540 threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode) 1541 1542 threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode) 1543 threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode) 1544 1545 vornCode = ''' 1546 destElem = srcElem1 | ~srcElem2; 1547 ''' 1548 threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode) 1549 threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode) 1550 1551 veorCode = ''' 1552 destElem = srcElem1 ^ srcElem2; 1553 ''' 1554 threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode) 1555 threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode) 1556 1557 vbifCode = ''' 1558 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2); 1559 ''' 1560 threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True) 1561 threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True) 1562 vbitCode = ''' 1563 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2); 1564 ''' 1565 threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True) 1566 threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True) 1567 vbslCode = ''' 1568 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem); 1569 ''' 1570 threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True) 1571 threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True) 1572 1573 vmaxCode = ''' 1574 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2; 1575 ''' 1576 threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode) 1577 threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode) 1578 1579 vminCode = ''' 1580 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2; 1581 ''' 1582 threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode) 1583 threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode) 1584 1585 vaddCode = ''' 1586 destElem = srcElem1 + srcElem2; 1587 ''' 1588 threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode) 1589 threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode) 1590 1591 threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes, 1592 2, vaddCode, pairwise=True) 1593 threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes, 1594 4, vaddCode, pairwise=True) 1595 vaddlwCode = ''' 1596 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 1597 ''' 1598 threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode) 1599 threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode) 1600 vaddhnCode = ''' 1601 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 1602 (sizeof(Element) * 8); 1603 ''' 1604 threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode) 1605 vraddhnCode = ''' 1606 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1607 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1608 (sizeof(Element) * 8); 1609 ''' 1610 threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode) 1611 1612 vsubCode = ''' 1613 destElem = srcElem1 - srcElem2; 1614 ''' 1615 threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode) 1616 threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode) 1617 vsublwCode = ''' 1618 destElem = (BigElement)srcElem1 - (BigElement)srcElem2; 1619 ''' 1620 threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode) 1621 threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode) 1622 1623 vqaddUCode = ''' 1624 destElem = srcElem1 + srcElem2; 1625 FPSCR fpscr = (FPSCR)Fpscr; 1626 if (destElem < srcElem1 || destElem < srcElem2) { 1627 destElem = (Element)(-1); 1628 fpscr.qc = 1; 1629 } 1630 Fpscr = fpscr; 1631 ''' 1632 threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode) 1633 threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode) 1634 vsubhnCode = ''' 1635 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 1636 (sizeof(Element) * 8); 1637 ''' 1638 threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode) 1639 vrsubhnCode = ''' 1640 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1641 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1642 (sizeof(Element) * 8); 1643 ''' 1644 threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode) 1645 1646 vqaddSCode = ''' 1647 destElem = srcElem1 + srcElem2; 1648 FPSCR fpscr = (FPSCR)Fpscr; 1649 bool negDest = (destElem < 0); 1650 bool negSrc1 = (srcElem1 < 0); 1651 bool negSrc2 = (srcElem2 < 0); 1652 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 1653 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1654 if (negDest) 1655 destElem -= 1; 1656 fpscr.qc = 1; 1657 } 1658 Fpscr = fpscr; 1659 ''' 1660 threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode) 1661 threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode) 1662 1663 vqsubUCode = ''' 1664 destElem = srcElem1 - srcElem2; 1665 FPSCR fpscr = (FPSCR)Fpscr; 1666 if (destElem > srcElem1) { 1667 destElem = 0; 1668 fpscr.qc = 1; 1669 } 1670 Fpscr = fpscr; 1671 ''' 1672 threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode) 1673 threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode) 1674 1675 vqsubSCode = ''' 1676 destElem = srcElem1 - srcElem2; 1677 FPSCR fpscr = (FPSCR)Fpscr; 1678 bool negDest = (destElem < 0); 1679 bool negSrc1 = (srcElem1 < 0); 1680 bool posSrc2 = (srcElem2 >= 0); 1681 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 1682 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1683 if (negDest) 1684 destElem -= 1; 1685 fpscr.qc = 1; 1686 } 1687 Fpscr = fpscr; 1688 ''' 1689 threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode) 1690 threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode) 1691 1692 vcgtCode = ''' 1693 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0; 1694 ''' 1695 threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode) 1696 threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode) 1697 1698 vcgeCode = ''' 1699 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0; 1700 ''' 1701 threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode) 1702 threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode) 1703 1704 vceqCode = ''' 1705 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0; 1706 ''' 1707 threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode) 1708 threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode) 1709 1710 vshlCode = ''' 1711 int16_t shiftAmt = (int8_t)srcElem2; 1712 if (shiftAmt < 0) { 1713 shiftAmt = -shiftAmt; 1714 if (shiftAmt >= sizeof(Element) * 8) { 1715 shiftAmt = sizeof(Element) * 8 - 1; 1716 destElem = 0; 1717 } else { 1718 destElem = (srcElem1 >> shiftAmt); 1719 } 1720 // Make sure the right shift sign extended when it should. 1721 if (srcElem1 < 0 && destElem >= 0) { 1722 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1723 1 - shiftAmt)); 1724 } 1725 } else { 1726 if (shiftAmt >= sizeof(Element) * 8) { 1727 destElem = 0; 1728 } else { 1729 destElem = srcElem1 << shiftAmt; 1730 } 1731 } 1732 ''' 1733 threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode) 1734 threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode) 1735 1736 vrshlCode = ''' 1737 int16_t shiftAmt = (int8_t)srcElem2; 1738 if (shiftAmt < 0) { 1739 shiftAmt = -shiftAmt; 1740 Element rBit = 0; 1741 if (shiftAmt <= sizeof(Element) * 8) 1742 rBit = bits(srcElem1, shiftAmt - 1); 1743 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1744 rBit = 1; 1745 if (shiftAmt >= sizeof(Element) * 8) { 1746 shiftAmt = sizeof(Element) * 8 - 1; 1747 destElem = 0; 1748 } else { 1749 destElem = (srcElem1 >> shiftAmt); 1750 } 1751 // Make sure the right shift sign extended when it should. 1752 if (srcElem1 < 0 && destElem >= 0) { 1753 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1754 1 - shiftAmt)); 1755 } 1756 destElem += rBit; 1757 } else if (shiftAmt > 0) { 1758 if (shiftAmt >= sizeof(Element) * 8) { 1759 destElem = 0; 1760 } else { 1761 destElem = srcElem1 << shiftAmt; 1762 } 1763 } else { 1764 destElem = srcElem1; 1765 } 1766 ''' 1767 threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode) 1768 threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode) 1769 1770 vqshlUCode = ''' 1771 int16_t shiftAmt = (int8_t)srcElem2; 1772 FPSCR fpscr = (FPSCR)Fpscr; 1773 if (shiftAmt < 0) { 1774 shiftAmt = -shiftAmt; 1775 if (shiftAmt >= sizeof(Element) * 8) { 1776 shiftAmt = sizeof(Element) * 8 - 1; 1777 destElem = 0; 1778 } else { 1779 destElem = (srcElem1 >> shiftAmt); 1780 } 1781 // Make sure the right shift sign extended when it should. 1782 if (srcElem1 < 0 && destElem >= 0) { 1783 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1784 1 - shiftAmt)); 1785 } 1786 } else if (shiftAmt > 0) { 1787 if (shiftAmt >= sizeof(Element) * 8) { 1788 if (srcElem1 != 0) { 1789 destElem = mask(sizeof(Element) * 8); 1790 fpscr.qc = 1; 1791 } else { 1792 destElem = 0; 1793 } 1794 } else { 1795 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1796 sizeof(Element) * 8 - shiftAmt)) { 1797 destElem = mask(sizeof(Element) * 8); 1798 fpscr.qc = 1; 1799 } else { 1800 destElem = srcElem1 << shiftAmt; 1801 } 1802 } 1803 } else { 1804 destElem = srcElem1; 1805 } 1806 Fpscr = fpscr; 1807 ''' 1808 threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode) 1809 threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode) 1810 1811 vqshlSCode = ''' 1812 int16_t shiftAmt = (int8_t)srcElem2; 1813 FPSCR fpscr = (FPSCR)Fpscr; 1814 if (shiftAmt < 0) { 1815 shiftAmt = -shiftAmt; 1816 if (shiftAmt >= sizeof(Element) * 8) { 1817 shiftAmt = sizeof(Element) * 8 - 1; 1818 destElem = 0; 1819 } else { 1820 destElem = (srcElem1 >> shiftAmt); 1821 } 1822 // Make sure the right shift sign extended when it should. 1823 if (srcElem1 < 0 && destElem >= 0) { 1824 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1825 1 - shiftAmt)); 1826 } 1827 } else if (shiftAmt > 0) { 1828 bool sat = false; 1829 if (shiftAmt >= sizeof(Element) * 8) { 1830 if (srcElem1 != 0) 1831 sat = true; 1832 else 1833 destElem = 0; 1834 } else { 1835 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1836 sizeof(Element) * 8 - 1 - shiftAmt) != 1837 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1838 sat = true; 1839 } else { 1840 destElem = srcElem1 << shiftAmt; 1841 } 1842 } 1843 if (sat) { 1844 fpscr.qc = 1; 1845 destElem = mask(sizeof(Element) * 8 - 1); 1846 if (srcElem1 < 0) 1847 destElem = ~destElem; 1848 } 1849 } else { 1850 destElem = srcElem1; 1851 } 1852 Fpscr = fpscr; 1853 ''' 1854 threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode) 1855 threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode) 1856 1857 vqrshlUCode = ''' 1858 int16_t shiftAmt = (int8_t)srcElem2; 1859 FPSCR fpscr = (FPSCR)Fpscr; 1860 if (shiftAmt < 0) { 1861 shiftAmt = -shiftAmt; 1862 Element rBit = 0; 1863 if (shiftAmt <= sizeof(Element) * 8) 1864 rBit = bits(srcElem1, shiftAmt - 1); 1865 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1866 rBit = 1; 1867 if (shiftAmt >= sizeof(Element) * 8) { 1868 shiftAmt = sizeof(Element) * 8 - 1; 1869 destElem = 0; 1870 } else { 1871 destElem = (srcElem1 >> shiftAmt); 1872 } 1873 // Make sure the right shift sign extended when it should. 1874 if (srcElem1 < 0 && destElem >= 0) { 1875 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1876 1 - shiftAmt)); 1877 } 1878 destElem += rBit; 1879 } else { 1880 if (shiftAmt >= sizeof(Element) * 8) { 1881 if (srcElem1 != 0) { 1882 destElem = mask(sizeof(Element) * 8); 1883 fpscr.qc = 1; 1884 } else { 1885 destElem = 0; 1886 } 1887 } else { 1888 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1889 sizeof(Element) * 8 - shiftAmt)) { 1890 destElem = mask(sizeof(Element) * 8); 1891 fpscr.qc = 1; 1892 } else { 1893 destElem = srcElem1 << shiftAmt; 1894 } 1895 } 1896 } 1897 Fpscr = fpscr; 1898 ''' 1899 threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode) 1900 threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode) 1901 1902 vqrshlSCode = ''' 1903 int16_t shiftAmt = (int8_t)srcElem2; 1904 FPSCR fpscr = (FPSCR)Fpscr; 1905 if (shiftAmt < 0) { 1906 shiftAmt = -shiftAmt; 1907 Element rBit = 0; 1908 if (shiftAmt <= sizeof(Element) * 8) 1909 rBit = bits(srcElem1, shiftAmt - 1); 1910 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1911 rBit = 1; 1912 if (shiftAmt >= sizeof(Element) * 8) { 1913 shiftAmt = sizeof(Element) * 8 - 1; 1914 destElem = 0; 1915 } else { 1916 destElem = (srcElem1 >> shiftAmt); 1917 } 1918 // Make sure the right shift sign extended when it should. 1919 if (srcElem1 < 0 && destElem >= 0) { 1920 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1921 1 - shiftAmt)); 1922 } 1923 destElem += rBit; 1924 } else if (shiftAmt > 0) { 1925 bool sat = false; 1926 if (shiftAmt >= sizeof(Element) * 8) { 1927 if (srcElem1 != 0) 1928 sat = true; 1929 else 1930 destElem = 0; 1931 } else { 1932 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1933 sizeof(Element) * 8 - 1 - shiftAmt) != 1934 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1935 sat = true; 1936 } else { 1937 destElem = srcElem1 << shiftAmt; 1938 } 1939 } 1940 if (sat) { 1941 fpscr.qc = 1; 1942 destElem = mask(sizeof(Element) * 8 - 1); 1943 if (srcElem1 < 0) 1944 destElem = ~destElem; 1945 } 1946 } else { 1947 destElem = srcElem1; 1948 } 1949 Fpscr = fpscr; 1950 ''' 1951 threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode) 1952 threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode) 1953 1954 vabaCode = ''' 1955 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1956 (srcElem2 - srcElem1); 1957 ''' 1958 threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True) 1959 threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True) 1960 vabalCode = ''' 1961 destElem += (srcElem1 > srcElem2) ? 1962 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1963 ((BigElement)srcElem2 - (BigElement)srcElem1); 1964 ''' 1965 threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True) 1966 1967 vabdCode = ''' 1968 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1969 (srcElem2 - srcElem1); 1970 ''' 1971 threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode) 1972 threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode) 1973 vabdlCode = ''' 1974 destElem = (srcElem1 > srcElem2) ? 1975 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1976 ((BigElement)srcElem2 - (BigElement)srcElem1); 1977 ''' 1978 threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode) 1979 1980 vtstCode = ''' 1981 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0; 1982 ''' 1983 threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode) 1984 threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode) 1985 1986 vmulCode = ''' 1987 destElem = srcElem1 * srcElem2; 1988 ''' 1989 threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode) 1990 threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode) 1991 vmullCode = ''' 1992 destElem = (BigElement)srcElem1 * (BigElement)srcElem2; 1993 ''' 1994 threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode) 1995 1996 vmlaCode = ''' 1997 destElem = destElem + srcElem1 * srcElem2; 1998 ''' 1999 threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True) 2000 threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True) 2001 vmlalCode = ''' 2002 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2; 2003 ''' 2004 threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True) 2005 2006 vqdmlalCode = ''' 2007 FPSCR fpscr = (FPSCR)Fpscr; 2008 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2009 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2010 Element halfNeg = maxNeg / 2; 2011 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2012 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2013 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2014 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2015 fpscr.qc = 1; 2016 } 2017 bool negPreDest = (destElem < 0); 2018 destElem += midElem; 2019 bool negDest = (destElem < 0); 2020 bool negMid = (midElem < 0); 2021 if (negPreDest == negMid && negMid != negDest) { 2022 destElem = mask(sizeof(BigElement) * 8 - 1); 2023 if (negPreDest) 2024 destElem = ~destElem; 2025 fpscr.qc = 1; 2026 } 2027 Fpscr = fpscr; 2028 ''' 2029 threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True) 2030 2031 vqdmlslCode = ''' 2032 FPSCR fpscr = (FPSCR)Fpscr; 2033 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2034 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2035 Element halfNeg = maxNeg / 2; 2036 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2037 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2038 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2039 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2040 fpscr.qc = 1; 2041 } 2042 bool negPreDest = (destElem < 0); 2043 destElem -= midElem; 2044 bool negDest = (destElem < 0); 2045 bool posMid = (midElem > 0); 2046 if (negPreDest == posMid && posMid != negDest) { 2047 destElem = mask(sizeof(BigElement) * 8 - 1); 2048 if (negPreDest) 2049 destElem = ~destElem; 2050 fpscr.qc = 1; 2051 } 2052 Fpscr = fpscr; 2053 ''' 2054 threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True) 2055 2056 vqdmullCode = ''' 2057 FPSCR fpscr = (FPSCR)Fpscr; 2058 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2059 if (srcElem1 == srcElem2 && 2060 srcElem1 == (Element)((Element)1 << 2061 (Element)(sizeof(Element) * 8 - 1))) { 2062 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2063 fpscr.qc = 1; 2064 } 2065 Fpscr = fpscr; 2066 ''' 2067 threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode) 2068 2069 vmlsCode = ''' 2070 destElem = destElem - srcElem1 * srcElem2; 2071 ''' 2072 threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True) 2073 threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True) 2074 vmlslCode = ''' 2075 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2; 2076 ''' 2077 threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True) 2078 2079 vmulpCode = ''' 2080 destElem = 0; 2081 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2082 if (bits(srcElem2, j)) 2083 destElem ^= srcElem1 << j; 2084 } 2085 ''' 2086 threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode) 2087 threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode) 2088 vmullpCode = ''' 2089 destElem = 0; 2090 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2091 if (bits(srcElem2, j)) 2092 destElem ^= (BigElement)srcElem1 << j; 2093 } 2094 ''' 2095 threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode) 2096 2097 threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True) 2098 threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True) 2099 2100 threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True) 2101 threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True) 2102 2103 vqdmulhCode = ''' 2104 FPSCR fpscr = (FPSCR)Fpscr; 2105 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2106 (sizeof(Element) * 8); 2107 if (srcElem1 == srcElem2 && 2108 srcElem1 == (Element)((Element)1 << 2109 (sizeof(Element) * 8 - 1))) { 2110 destElem = ~srcElem1; 2111 fpscr.qc = 1; 2112 } 2113 Fpscr = fpscr; 2114 ''' 2115 threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode) 2116 threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode) 2117 2118 vqrdmulhCode = ''' 2119 FPSCR fpscr = (FPSCR)Fpscr; 2120 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2121 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2122 (sizeof(Element) * 8); 2123 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2124 Element halfNeg = maxNeg / 2; 2125 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2126 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2127 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2128 if (destElem < 0) { 2129 destElem = mask(sizeof(Element) * 8 - 1); 2130 } else { 2131 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2132 } 2133 fpscr.qc = 1; 2134 } 2135 Fpscr = fpscr; 2136 ''' 2137 threeEqualRegInst("vqrdmulh", "VqrdmulhD", 2138 smallSignedTypes, 2, vqrdmulhCode) 2139 threeEqualRegInst("vqrdmulh", "VqrdmulhQ", 2140 smallSignedTypes, 4, vqrdmulhCode) 2141 2142 vmaxfpCode = ''' 2143 FPSCR fpscr = (FPSCR)Fpscr; 2144 bool done; 2145 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2146 if (!done) { 2147 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS, 2148 true, true, VfpRoundNearest); 2149 } else if (flushToZero(srcReg1, srcReg2)) { 2150 fpscr.idc = 1; 2151 } 2152 Fpscr = fpscr; 2153 ''' 2154 threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode) 2155 threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode) 2156 2157 vminfpCode = ''' 2158 FPSCR fpscr = (FPSCR)Fpscr; 2159 bool done; 2160 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2161 if (!done) { 2162 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS, 2163 true, true, VfpRoundNearest); 2164 } else if (flushToZero(srcReg1, srcReg2)) { 2165 fpscr.idc = 1; 2166 } 2167 Fpscr = fpscr; 2168 ''' 2169 threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode) 2170 threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode) 2171 2172 threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",), 2173 2, vmaxfpCode, pairwise=True) 2174 threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",), 2175 4, vmaxfpCode, pairwise=True) 2176 2177 threeEqualRegInstFp("vpmin", "VpminDFp", ("float",), 2178 2, vminfpCode, pairwise=True) 2179 threeEqualRegInstFp("vpmin", "VpminQFp", ("float",), 2180 4, vminfpCode, pairwise=True) 2181 2182 vaddfpCode = ''' 2183 FPSCR fpscr = Fpscr; 2184 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, 2185 true, true, VfpRoundNearest); 2186 Fpscr = fpscr; 2187 ''' 2188 threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode) 2189 threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode) 2190 2191 threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",), 2192 2, vaddfpCode, pairwise=True) 2193 threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",), 2194 4, vaddfpCode, pairwise=True) 2195 2196 vsubfpCode = ''' 2197 FPSCR fpscr = Fpscr; 2198 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2199 true, true, VfpRoundNearest); 2200 Fpscr = fpscr; 2201 ''' 2202 threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode) 2203 threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode) 2204 2205 vmulfpCode = ''' 2206 FPSCR fpscr = Fpscr; 2207 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2208 true, true, VfpRoundNearest); 2209 Fpscr = fpscr; 2210 ''' 2211 threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode) 2212 threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode) 2213 2214 vmlafpCode = ''' 2215 FPSCR fpscr = Fpscr; 2216 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2217 true, true, VfpRoundNearest); 2218 destReg = binaryOp(fpscr, mid, destReg, fpAddS, 2219 true, true, VfpRoundNearest); 2220 Fpscr = fpscr; 2221 ''' 2222 threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True) 2223 threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True) 2224 2225 vmlsfpCode = ''' 2226 FPSCR fpscr = Fpscr; 2227 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2228 true, true, VfpRoundNearest); 2229 destReg = binaryOp(fpscr, destReg, mid, fpSubS, 2230 true, true, VfpRoundNearest); 2231 Fpscr = fpscr; 2232 ''' 2233 threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True) 2234 threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True) 2235 2236 vcgtfpCode = ''' 2237 FPSCR fpscr = (FPSCR)Fpscr; 2238 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, 2239 true, true, VfpRoundNearest); 2240 destReg = (res == 0) ? -1 : 0; 2241 if (res == 2.0) 2242 fpscr.ioc = 1; 2243 Fpscr = fpscr; 2244 ''' 2245 threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",), 2246 2, vcgtfpCode, toInt = True) 2247 threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",), 2248 4, vcgtfpCode, toInt = True) 2249 2250 vcgefpCode = ''' 2251 FPSCR fpscr = (FPSCR)Fpscr; 2252 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, 2253 true, true, VfpRoundNearest); 2254 destReg = (res == 0) ? -1 : 0; 2255 if (res == 2.0) 2256 fpscr.ioc = 1; 2257 Fpscr = fpscr; 2258 ''' 2259 threeEqualRegInstFp("vcge", "VcgeDFp", ("float",), 2260 2, vcgefpCode, toInt = True) 2261 threeEqualRegInstFp("vcge", "VcgeQFp", ("float",), 2262 4, vcgefpCode, toInt = True) 2263 2264 vacgtfpCode = ''' 2265 FPSCR fpscr = (FPSCR)Fpscr; 2266 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, 2267 true, true, VfpRoundNearest); 2268 destReg = (res == 0) ? -1 : 0; 2269 if (res == 2.0) 2270 fpscr.ioc = 1; 2271 Fpscr = fpscr; 2272 ''' 2273 threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",), 2274 2, vacgtfpCode, toInt = True) 2275 threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",), 2276 4, vacgtfpCode, toInt = True) 2277 2278 vacgefpCode = ''' 2279 FPSCR fpscr = (FPSCR)Fpscr; 2280 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, 2281 true, true, VfpRoundNearest); 2282 destReg = (res == 0) ? -1 : 0; 2283 if (res == 2.0) 2284 fpscr.ioc = 1; 2285 Fpscr = fpscr; 2286 ''' 2287 threeEqualRegInstFp("vacge", "VacgeDFp", ("float",), 2288 2, vacgefpCode, toInt = True) 2289 threeEqualRegInstFp("vacge", "VacgeQFp", ("float",), 2290 4, vacgefpCode, toInt = True) 2291 2292 vceqfpCode = ''' 2293 FPSCR fpscr = (FPSCR)Fpscr; 2294 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, 2295 true, true, VfpRoundNearest); 2296 destReg = (res == 0) ? -1 : 0; 2297 if (res == 2.0) 2298 fpscr.ioc = 1; 2299 Fpscr = fpscr; 2300 ''' 2301 threeEqualRegInstFp("vceq", "VceqDFp", ("float",), 2302 2, vceqfpCode, toInt = True) 2303 threeEqualRegInstFp("vceq", "VceqQFp", ("float",), 2304 4, vceqfpCode, toInt = True) 2305 2306 vrecpsCode = ''' 2307 FPSCR fpscr = Fpscr; 2308 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, 2309 true, true, VfpRoundNearest); 2310 Fpscr = fpscr; 2311 ''' 2312 threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode) 2313 threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode) 2314 2315 vrsqrtsCode = ''' 2316 FPSCR fpscr = Fpscr; 2317 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, 2318 true, true, VfpRoundNearest); 2319 Fpscr = fpscr; 2320 ''' 2321 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode) 2322 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode) 2323 2324 vabdfpCode = ''' 2325 FPSCR fpscr = Fpscr; 2326 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2327 true, true, VfpRoundNearest); 2328 destReg = fabs(mid); 2329 Fpscr = fpscr; 2330 ''' 2331 threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode) 2332 threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode) 2333 2334 twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True) 2335 twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True) 2336 twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True) 2337 twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True) 2338 twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True) 2339 2340 twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True) 2341 twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True) 2342 twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True) 2343 twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True) 2344 twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True) 2345 2346 twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode) 2347 twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode) 2348 twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode) 2349 twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode) 2350 twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode) 2351 2352 twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode) 2353 twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True) 2354 twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True) 2355 twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode) 2356 twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode) 2357 twoEqualRegInst("vqrdmulh", "VqrdmulhsD", 2358 smallSignedTypes, 2, vqrdmulhCode) 2359 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", 2360 smallSignedTypes, 4, vqrdmulhCode) 2361 2362 vshrCode = ''' 2363 if (imm >= sizeof(srcElem1) * 8) { 2364 if (srcElem1 < 0) 2365 destElem = -1; 2366 else 2367 destElem = 0; 2368 } else { 2369 destElem = srcElem1 >> imm; 2370 } 2371 ''' 2372 twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode) 2373 twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode) 2374 2375 vsraCode = ''' 2376 Element mid;; 2377 if (imm >= sizeof(srcElem1) * 8) { 2378 mid = (srcElem1 < 0) ? -1 : 0; 2379 } else { 2380 mid = srcElem1 >> imm; 2381 if (srcElem1 < 0 && mid >= 0) { 2382 mid |= -(mid & ((Element)1 << 2383 (sizeof(Element) * 8 - 1 - imm))); 2384 } 2385 } 2386 destElem += mid; 2387 ''' 2388 twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True) 2389 twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True) 2390 2391 vrshrCode = ''' 2392 if (imm > sizeof(srcElem1) * 8) { 2393 destElem = 0; 2394 } else if (imm) { 2395 Element rBit = bits(srcElem1, imm - 1); 2396 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2397 } else { 2398 destElem = srcElem1; 2399 } 2400 ''' 2401 twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode) 2402 twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode) 2403 2404 vrsraCode = ''' 2405 if (imm > sizeof(srcElem1) * 8) { 2406 destElem += 0; 2407 } else if (imm) { 2408 Element rBit = bits(srcElem1, imm - 1); 2409 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2410 } else { 2411 destElem += srcElem1; 2412 } 2413 ''' 2414 twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True) 2415 twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True) 2416 2417 vsriCode = ''' 2418 if (imm >= sizeof(Element) * 8) 2419 destElem = destElem; 2420 else 2421 destElem = (srcElem1 >> imm) | 2422 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2423 ''' 2424 twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True) 2425 twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True) 2426 2427 vshlCode = ''' 2428 if (imm >= sizeof(Element) * 8) 2429 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 2430 else 2431 destElem = srcElem1 << imm; 2432 ''' 2433 twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode) 2434 twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode) 2435 2436 vsliCode = ''' 2437 if (imm >= sizeof(Element) * 8) 2438 destElem = destElem; 2439 else 2440 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 2441 ''' 2442 twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True) 2443 twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True) 2444 2445 vqshlCode = ''' 2446 FPSCR fpscr = (FPSCR)Fpscr; 2447 if (imm >= sizeof(Element) * 8) { 2448 if (srcElem1 != 0) { 2449 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2450 if (srcElem1 > 0) 2451 destElem = ~destElem; 2452 fpscr.qc = 1; 2453 } else { 2454 destElem = 0; 2455 } 2456 } else if (imm) { 2457 destElem = (srcElem1 << imm); 2458 uint64_t topBits = bits((uint64_t)srcElem1, 2459 sizeof(Element) * 8 - 1, 2460 sizeof(Element) * 8 - 1 - imm); 2461 if (topBits != 0 && topBits != mask(imm + 1)) { 2462 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2463 if (srcElem1 > 0) 2464 destElem = ~destElem; 2465 fpscr.qc = 1; 2466 } 2467 } else { 2468 destElem = srcElem1; 2469 } 2470 Fpscr = fpscr; 2471 ''' 2472 twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode) 2473 twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode) 2474 2475 vqshluCode = ''' 2476 FPSCR fpscr = (FPSCR)Fpscr; 2477 if (imm >= sizeof(Element) * 8) { 2478 if (srcElem1 != 0) { 2479 destElem = mask(sizeof(Element) * 8); 2480 fpscr.qc = 1; 2481 } else { 2482 destElem = 0; 2483 } 2484 } else if (imm) { 2485 destElem = (srcElem1 << imm); 2486 uint64_t topBits = bits((uint64_t)srcElem1, 2487 sizeof(Element) * 8 - 1, 2488 sizeof(Element) * 8 - imm); 2489 if (topBits != 0) { 2490 destElem = mask(sizeof(Element) * 8); 2491 fpscr.qc = 1; 2492 } 2493 } else { 2494 destElem = srcElem1; 2495 } 2496 Fpscr = fpscr; 2497 ''' 2498 twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode) 2499 twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode) 2500 2501 vqshlusCode = ''' 2502 FPSCR fpscr = (FPSCR)Fpscr; 2503 if (imm >= sizeof(Element) * 8) { 2504 if (srcElem1 < 0) { 2505 destElem = 0; 2506 fpscr.qc = 1; 2507 } else if (srcElem1 > 0) { 2508 destElem = mask(sizeof(Element) * 8); 2509 fpscr.qc = 1; 2510 } else { 2511 destElem = 0; 2512 } 2513 } else if (imm) { 2514 destElem = (srcElem1 << imm); 2515 uint64_t topBits = bits((uint64_t)srcElem1, 2516 sizeof(Element) * 8 - 1, 2517 sizeof(Element) * 8 - imm); 2518 if (srcElem1 < 0) { 2519 destElem = 0; 2520 fpscr.qc = 1; 2521 } else if (topBits != 0) { 2522 destElem = mask(sizeof(Element) * 8); 2523 fpscr.qc = 1; 2524 } 2525 } else { 2526 if (srcElem1 < 0) { 2527 fpscr.qc = 1; 2528 destElem = 0; 2529 } else { 2530 destElem = srcElem1; 2531 } 2532 } 2533 Fpscr = fpscr; 2534 ''' 2535 twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode) 2536 twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode) 2537 2538 vshrnCode = ''' 2539 if (imm >= sizeof(srcElem1) * 8) { 2540 destElem = 0; 2541 } else { 2542 destElem = srcElem1 >> imm; 2543 } 2544 ''' 2545 twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode) 2546 2547 vrshrnCode = ''' 2548 if (imm > sizeof(srcElem1) * 8) { 2549 destElem = 0; 2550 } else if (imm) { 2551 Element rBit = bits(srcElem1, imm - 1); 2552 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2553 } else { 2554 destElem = srcElem1; 2555 } 2556 ''' 2557 twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode) 2558 2559 vqshrnCode = ''' 2560 FPSCR fpscr = (FPSCR)Fpscr; 2561 if (imm > sizeof(srcElem1) * 8) { 2562 if (srcElem1 != 0 && srcElem1 != -1) 2563 fpscr.qc = 1; 2564 destElem = 0; 2565 } else if (imm) { 2566 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2567 mid |= -(mid & ((BigElement)1 << 2568 (sizeof(BigElement) * 8 - 1 - imm))); 2569 if (mid != (Element)mid) { 2570 destElem = mask(sizeof(Element) * 8 - 1); 2571 if (srcElem1 < 0) 2572 destElem = ~destElem; 2573 fpscr.qc = 1; 2574 } else { 2575 destElem = mid; 2576 } 2577 } else { 2578 destElem = srcElem1; 2579 } 2580 Fpscr = fpscr; 2581 ''' 2582 twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode) 2583 2584 vqshrunCode = ''' 2585 FPSCR fpscr = (FPSCR)Fpscr; 2586 if (imm > sizeof(srcElem1) * 8) { 2587 if (srcElem1 != 0) 2588 fpscr.qc = 1; 2589 destElem = 0; 2590 } else if (imm) { 2591 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2592 if (mid != (Element)mid) { 2593 destElem = mask(sizeof(Element) * 8); 2594 fpscr.qc = 1; 2595 } else { 2596 destElem = mid; 2597 } 2598 } else { 2599 destElem = srcElem1; 2600 } 2601 Fpscr = fpscr; 2602 ''' 2603 twoRegNarrowShiftInst("vqshrun", "NVqshrun", 2604 smallUnsignedTypes, vqshrunCode) 2605 2606 vqshrunsCode = ''' 2607 FPSCR fpscr = (FPSCR)Fpscr; 2608 if (imm > sizeof(srcElem1) * 8) { 2609 if (srcElem1 != 0) 2610 fpscr.qc = 1; 2611 destElem = 0; 2612 } else if (imm) { 2613 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2614 if (bits(mid, sizeof(BigElement) * 8 - 1, 2615 sizeof(Element) * 8) != 0) { 2616 if (srcElem1 < 0) { 2617 destElem = 0; 2618 } else { 2619 destElem = mask(sizeof(Element) * 8); 2620 } 2621 fpscr.qc = 1; 2622 } else { 2623 destElem = mid; 2624 } 2625 } else { 2626 destElem = srcElem1; 2627 } 2628 Fpscr = fpscr; 2629 ''' 2630 twoRegNarrowShiftInst("vqshrun", "NVqshruns", 2631 smallSignedTypes, vqshrunsCode) 2632 2633 vqrshrnCode = ''' 2634 FPSCR fpscr = (FPSCR)Fpscr; 2635 if (imm > sizeof(srcElem1) * 8) { 2636 if (srcElem1 != 0 && srcElem1 != -1) 2637 fpscr.qc = 1; 2638 destElem = 0; 2639 } else if (imm) { 2640 BigElement mid = (srcElem1 >> (imm - 1)); 2641 uint64_t rBit = mid & 0x1; 2642 mid >>= 1; 2643 mid |= -(mid & ((BigElement)1 << 2644 (sizeof(BigElement) * 8 - 1 - imm))); 2645 mid += rBit; 2646 if (mid != (Element)mid) { 2647 destElem = mask(sizeof(Element) * 8 - 1); 2648 if (srcElem1 < 0) 2649 destElem = ~destElem; 2650 fpscr.qc = 1; 2651 } else { 2652 destElem = mid; 2653 } 2654 } else { 2655 if (srcElem1 != (Element)srcElem1) { 2656 destElem = mask(sizeof(Element) * 8 - 1); 2657 if (srcElem1 < 0) 2658 destElem = ~destElem; 2659 fpscr.qc = 1; 2660 } else { 2661 destElem = srcElem1; 2662 } 2663 } 2664 Fpscr = fpscr; 2665 ''' 2666 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", 2667 smallSignedTypes, vqrshrnCode) 2668 2669 vqrshrunCode = ''' 2670 FPSCR fpscr = (FPSCR)Fpscr; 2671 if (imm > sizeof(srcElem1) * 8) { 2672 if (srcElem1 != 0) 2673 fpscr.qc = 1; 2674 destElem = 0; 2675 } else if (imm) { 2676 BigElement mid = (srcElem1 >> (imm - 1)); 2677 uint64_t rBit = mid & 0x1; 2678 mid >>= 1; 2679 mid += rBit; 2680 if (mid != (Element)mid) { 2681 destElem = mask(sizeof(Element) * 8); 2682 fpscr.qc = 1; 2683 } else { 2684 destElem = mid; 2685 } 2686 } else { 2687 if (srcElem1 != (Element)srcElem1) { 2688 destElem = mask(sizeof(Element) * 8 - 1); 2689 if (srcElem1 < 0) 2690 destElem = ~destElem; 2691 fpscr.qc = 1; 2692 } else { 2693 destElem = srcElem1; 2694 } 2695 } 2696 Fpscr = fpscr; 2697 ''' 2698 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", 2699 smallUnsignedTypes, vqrshrunCode) 2700 2701 vqrshrunsCode = ''' 2702 FPSCR fpscr = (FPSCR)Fpscr; 2703 if (imm > sizeof(srcElem1) * 8) { 2704 if (srcElem1 != 0) 2705 fpscr.qc = 1; 2706 destElem = 0; 2707 } else if (imm) { 2708 BigElement mid = (srcElem1 >> (imm - 1)); 2709 uint64_t rBit = mid & 0x1; 2710 mid >>= 1; 2711 mid |= -(mid & ((BigElement)1 << 2712 (sizeof(BigElement) * 8 - 1 - imm))); 2713 mid += rBit; 2714 if (bits(mid, sizeof(BigElement) * 8 - 1, 2715 sizeof(Element) * 8) != 0) { 2716 if (srcElem1 < 0) { 2717 destElem = 0; 2718 } else { 2719 destElem = mask(sizeof(Element) * 8); 2720 } 2721 fpscr.qc = 1; 2722 } else { 2723 destElem = mid; 2724 } 2725 } else { 2726 if (srcElem1 < 0) { 2727 fpscr.qc = 1; 2728 destElem = 0; 2729 } else { 2730 destElem = srcElem1; 2731 } 2732 } 2733 Fpscr = fpscr; 2734 ''' 2735 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", 2736 smallSignedTypes, vqrshrunsCode) 2737 2738 vshllCode = ''' 2739 if (imm >= sizeof(destElem) * 8) { 2740 destElem = 0; 2741 } else { 2742 destElem = (BigElement)srcElem1 << imm; 2743 } 2744 ''' 2745 twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode) 2746 2747 vmovlCode = ''' 2748 destElem = srcElem1; 2749 ''' 2750 twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode) 2751 2752 vcvt2ufxCode = ''' 2753 FPSCR fpscr = Fpscr; 2754 if (flushToZero(srcElem1)) 2755 fpscr.idc = 1; 2756 VfpSavedState state = prepFpState(VfpRoundNearest); 2757 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2758 destReg = vfpFpSToFixed(srcElem1, false, false, imm); 2759 __asm__ __volatile__("" :: "m" (destReg)); 2760 finishVfp(fpscr, state, true); 2761 Fpscr = fpscr; 2762 ''' 2763 twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",), 2764 2, vcvt2ufxCode, toInt = True) 2765 twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",), 2766 4, vcvt2ufxCode, toInt = True) 2767 2768 vcvt2sfxCode = ''' 2769 FPSCR fpscr = Fpscr; 2770 if (flushToZero(srcElem1)) 2771 fpscr.idc = 1; 2772 VfpSavedState state = prepFpState(VfpRoundNearest); 2773 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2774 destReg = vfpFpSToFixed(srcElem1, true, false, imm); 2775 __asm__ __volatile__("" :: "m" (destReg)); 2776 finishVfp(fpscr, state, true); 2777 Fpscr = fpscr; 2778 ''' 2779 twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",), 2780 2, vcvt2sfxCode, toInt = True) 2781 twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",), 2782 4, vcvt2sfxCode, toInt = True) 2783 2784 vcvtu2fpCode = ''' 2785 FPSCR fpscr = Fpscr; 2786 VfpSavedState state = prepFpState(VfpRoundNearest); 2787 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2788 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); 2789 __asm__ __volatile__("" :: "m" (destElem)); 2790 finishVfp(fpscr, state, true); 2791 Fpscr = fpscr; 2792 ''' 2793 twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",), 2794 2, vcvtu2fpCode, fromInt = True) 2795 twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",), 2796 4, vcvtu2fpCode, fromInt = True) 2797 2798 vcvts2fpCode = ''' 2799 FPSCR fpscr = Fpscr; 2800 VfpSavedState state = prepFpState(VfpRoundNearest); 2801 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2802 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); 2803 __asm__ __volatile__("" :: "m" (destElem)); 2804 finishVfp(fpscr, state, true); 2805 Fpscr = fpscr; 2806 ''' 2807 twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",), 2808 2, vcvts2fpCode, fromInt = True) 2809 twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",), 2810 4, vcvts2fpCode, fromInt = True) 2811 2812 vcvts2hCode = ''' 2813 FPSCR fpscr = Fpscr; 2814 float srcFp1 = bitsToFp(srcElem1, (float)0.0); 2815 if (flushToZero(srcFp1)) 2816 fpscr.idc = 1; 2817 VfpSavedState state = prepFpState(VfpRoundNearest); 2818 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem) 2819 : "m" (srcFp1), "m" (destElem)); 2820 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest, 2821 fpscr.ahp, srcFp1); 2822 __asm__ __volatile__("" :: "m" (destElem)); 2823 finishVfp(fpscr, state, true); 2824 Fpscr = fpscr; 2825 ''' 2826 twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode) 2827 2828 vcvth2sCode = ''' 2829 FPSCR fpscr = Fpscr; 2830 VfpSavedState state = prepFpState(VfpRoundNearest); 2831 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) 2832 : "m" (srcElem1), "m" (destElem)); 2833 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); 2834 __asm__ __volatile__("" :: "m" (destElem)); 2835 finishVfp(fpscr, state, true); 2836 Fpscr = fpscr; 2837 ''' 2838 twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode) 2839 2840 vrsqrteCode = ''' 2841 destElem = unsignedRSqrtEstimate(srcElem1); 2842 ''' 2843 twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode) 2844 twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode) 2845 2846 vrsqrtefpCode = ''' 2847 FPSCR fpscr = Fpscr; 2848 if (flushToZero(srcReg1)) 2849 fpscr.idc = 1; 2850 destReg = fprSqrtEstimate(fpscr, srcReg1); 2851 Fpscr = fpscr; 2852 ''' 2853 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode) 2854 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode) 2855 2856 vrecpeCode = ''' 2857 destElem = unsignedRecipEstimate(srcElem1); 2858 ''' 2859 twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode) 2860 twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode) 2861 2862 vrecpefpCode = ''' 2863 FPSCR fpscr = Fpscr; 2864 if (flushToZero(srcReg1)) 2865 fpscr.idc = 1; 2866 destReg = fpRecipEstimate(fpscr, srcReg1); 2867 Fpscr = fpscr; 2868 ''' 2869 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode) 2870 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode) 2871 2872 vrev16Code = ''' 2873 destElem = srcElem1; 2874 unsigned groupSize = ((1 << 1) / sizeof(Element)); 2875 unsigned reverseMask = (groupSize - 1); 2876 j = i ^ reverseMask; 2877 ''' 2878 twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code) 2879 twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code) 2880 vrev32Code = ''' 2881 destElem = srcElem1; 2882 unsigned groupSize = ((1 << 2) / sizeof(Element)); 2883 unsigned reverseMask = (groupSize - 1); 2884 j = i ^ reverseMask; 2885 ''' 2886 twoRegMiscInst("vrev32", "NVrev32D", 2887 ("uint8_t", "uint16_t"), 2, vrev32Code) 2888 twoRegMiscInst("vrev32", "NVrev32Q", 2889 ("uint8_t", "uint16_t"), 4, vrev32Code) 2890 vrev64Code = ''' 2891 destElem = srcElem1; 2892 unsigned groupSize = ((1 << 3) / sizeof(Element)); 2893 unsigned reverseMask = (groupSize - 1); 2894 j = i ^ reverseMask; 2895 ''' 2896 twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code) 2897 twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code) 2898 2899 vpaddlCode = ''' 2900 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 2901 ''' 2902 twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode) 2903 twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode) 2904 2905 vpadalCode = ''' 2906 destElem += (BigElement)srcElem1 + (BigElement)srcElem2; 2907 ''' 2908 twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True) 2909 twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True) 2910 2911 vclsCode = ''' 2912 unsigned count = 0; 2913 if (srcElem1 < 0) { 2914 srcElem1 <<= 1; 2915 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 2916 count++; 2917 srcElem1 <<= 1; 2918 } 2919 } else { 2920 srcElem1 <<= 1; 2921 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 2922 count++; 2923 srcElem1 <<= 1; 2924 } 2925 } 2926 destElem = count; 2927 ''' 2928 twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode) 2929 twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode) 2930 2931 vclzCode = ''' 2932 unsigned count = 0; 2933 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 2934 count++; 2935 srcElem1 <<= 1; 2936 } 2937 destElem = count; 2938 ''' 2939 twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode) 2940 twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode) 2941 2942 vcntCode = ''' 2943 unsigned count = 0; 2944 while (srcElem1 && count < sizeof(Element) * 8) { 2945 count += srcElem1 & 0x1; 2946 srcElem1 >>= 1; 2947 } 2948 destElem = count; 2949 ''' 2950 twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode) 2951 twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode) 2952 2953 vmvnCode = ''' 2954 destElem = ~srcElem1; 2955 ''' 2956 twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode) 2957 twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode) 2958 2959 vqabsCode = ''' 2960 FPSCR fpscr = (FPSCR)Fpscr; 2961 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2962 fpscr.qc = 1; 2963 destElem = ~srcElem1; 2964 } else if (srcElem1 < 0) { 2965 destElem = -srcElem1; 2966 } else { 2967 destElem = srcElem1; 2968 } 2969 Fpscr = fpscr; 2970 ''' 2971 twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode) 2972 twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode) 2973 2974 vqnegCode = ''' 2975 FPSCR fpscr = (FPSCR)Fpscr; 2976 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2977 fpscr.qc = 1; 2978 destElem = ~srcElem1; 2979 } else { 2980 destElem = -srcElem1; 2981 } 2982 Fpscr = fpscr; 2983 ''' 2984 twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode) 2985 twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode) 2986 2987 vabsCode = ''' 2988 if (srcElem1 < 0) { 2989 destElem = -srcElem1; 2990 } else { 2991 destElem = srcElem1; 2992 } 2993 ''' 2994 twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode) 2995 twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode) 2996 vabsfpCode = ''' 2997 union 2998 { 2999 uint32_t i; 3000 float f; 3001 } cStruct; 3002 cStruct.f = srcReg1; 3003 cStruct.i &= mask(sizeof(Element) * 8 - 1); 3004 destReg = cStruct.f; 3005 ''' 3006 twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode) 3007 twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode) 3008 3009 vnegCode = ''' 3010 destElem = -srcElem1; 3011 ''' 3012 twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode) 3013 twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode) 3014 vnegfpCode = ''' 3015 destReg = -srcReg1; 3016 ''' 3017 twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode) 3018 twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode) 3019 3020 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;' 3021 twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode) 3022 twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode) 3023 vcgtfpCode = ''' 3024 FPSCR fpscr = (FPSCR)Fpscr; 3025 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc, 3026 true, true, VfpRoundNearest); 3027 destReg = (res == 0) ? -1 : 0; 3028 if (res == 2.0) 3029 fpscr.ioc = 1; 3030 Fpscr = fpscr; 3031 ''' 3032 twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",), 3033 2, vcgtfpCode, toInt = True) 3034 twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",), 3035 4, vcgtfpCode, toInt = True) 3036 3037 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;' 3038 twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode) 3039 twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode) 3040 vcgefpCode = ''' 3041 FPSCR fpscr = (FPSCR)Fpscr; 3042 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc, 3043 true, true, VfpRoundNearest); 3044 destReg = (res == 0) ? -1 : 0; 3045 if (res == 2.0) 3046 fpscr.ioc = 1; 3047 Fpscr = fpscr; 3048 ''' 3049 twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",), 3050 2, vcgefpCode, toInt = True) 3051 twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",), 3052 4, vcgefpCode, toInt = True) 3053 3054 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;' 3055 twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode) 3056 twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode) 3057 vceqfpCode = ''' 3058 FPSCR fpscr = (FPSCR)Fpscr; 3059 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc, 3060 true, true, VfpRoundNearest); 3061 destReg = (res == 0) ? -1 : 0; 3062 if (res == 2.0) 3063 fpscr.ioc = 1; 3064 Fpscr = fpscr; 3065 ''' 3066 twoRegMiscInstFp("vceq", "NVceqDFp", ("float",), 3067 2, vceqfpCode, toInt = True) 3068 twoRegMiscInstFp("vceq", "NVceqQFp", ("float",), 3069 4, vceqfpCode, toInt = True) 3070 3071 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;' 3072 twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode) 3073 twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode) 3074 vclefpCode = ''' 3075 FPSCR fpscr = (FPSCR)Fpscr; 3076 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc, 3077 true, true, VfpRoundNearest); 3078 destReg = (res == 0) ? -1 : 0; 3079 if (res == 2.0) 3080 fpscr.ioc = 1; 3081 Fpscr = fpscr; 3082 ''' 3083 twoRegMiscInstFp("vcle", "NVcleDFp", ("float",), 3084 2, vclefpCode, toInt = True) 3085 twoRegMiscInstFp("vcle", "NVcleQFp", ("float",), 3086 4, vclefpCode, toInt = True) 3087 3088 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;' 3089 twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode) 3090 twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode) 3091 vcltfpCode = ''' 3092 FPSCR fpscr = (FPSCR)Fpscr; 3093 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc, 3094 true, true, VfpRoundNearest); 3095 destReg = (res == 0) ? -1 : 0; 3096 if (res == 2.0) 3097 fpscr.ioc = 1; 3098 Fpscr = fpscr; 3099 ''' 3100 twoRegMiscInstFp("vclt", "NVcltDFp", ("float",), 3101 2, vcltfpCode, toInt = True) 3102 twoRegMiscInstFp("vclt", "NVcltQFp", ("float",), 3103 4, vcltfpCode, toInt = True) 3104 3105 vswpCode = ''' 3106 FloatRegBits mid; 3107 for (unsigned r = 0; r < rCount; r++) { 3108 mid = srcReg1.regs[r]; 3109 srcReg1.regs[r] = destReg.regs[r]; 3110 destReg.regs[r] = mid; 3111 } 3112 ''' 3113 twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode) 3114 twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode) 3115 3116 vtrnCode = ''' 3117 Element mid; 3118 for (unsigned i = 0; i < eCount; i += 2) { 3119 mid = srcReg1.elements[i]; 3120 srcReg1.elements[i] = destReg.elements[i + 1]; 3121 destReg.elements[i + 1] = mid; 3122 } 3123 ''' 3124 twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode) 3125 twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode) 3126 3127 vuzpCode = ''' 3128 Element mid[eCount]; 3129 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3130 for (unsigned i = 0; i < eCount / 2; i++) { 3131 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3132 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3133 destReg.elements[i] = destReg.elements[2 * i]; 3134 } 3135 for (unsigned i = 0; i < eCount / 2; i++) { 3136 destReg.elements[eCount / 2 + i] = mid[2 * i]; 3137 } 3138 ''' 3139 twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode) 3140 twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode) 3141 3142 vzipCode = ''' 3143 Element mid[eCount]; 3144 memcpy(&mid, &destReg, sizeof(destReg)); 3145 for (unsigned i = 0; i < eCount / 2; i++) { 3146 destReg.elements[2 * i] = mid[i]; 3147 destReg.elements[2 * i + 1] = srcReg1.elements[i]; 3148 } 3149 for (int i = 0; i < eCount / 2; i++) { 3150 srcReg1.elements[2 * i] = mid[eCount / 2 + i]; 3151 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i]; 3152 } 3153 ''' 3154 twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode) 3155 twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode) 3156 3157 vmovnCode = 'destElem = srcElem1;' 3158 twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode) 3159 3160 vdupCode = 'destElem = srcElem1;' 3161 twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode) 3162 twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode) 3163 3164 def vdupGprInst(name, Name, types, rCount): 3165 global header_output, exec_output 3166 eWalkCode = ''' 3167 RegVect destReg; 3168 for (unsigned i = 0; i < eCount; i++) { 3169 destReg.elements[i] = htog((Element)Op1); 3170 } 3171 ''' 3172 for reg in range(rCount): 3173 eWalkCode += ''' 3174 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 3175 ''' % { "reg" : reg } 3176 iop = InstObjParams(name, Name, 3177 "RegRegOp", 3178 { "code": eWalkCode, 3179 "r_count": rCount, 3180 "predicate_test": predicateTest }, []) 3181 header_output += NeonRegRegOpDeclare.subst(iop) 3182 exec_output += NeonEqualRegExecute.subst(iop) 3183 for type in types: 3184 substDict = { "targs" : type, 3185 "class_name" : Name } 3186 exec_output += NeonExecDeclare.subst(substDict) 3187 vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2) 3188 vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4) 3189 3190 vmovCode = 'destElem = imm;' 3191 oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode) 3192 oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode) 3193 3194 vorrCode = 'destElem |= imm;' 3195 oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True) 3196 oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True) 3197 3198 vmvnCode = 'destElem = ~imm;' 3199 oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode) 3200 oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode) 3201 3202 vbicCode = 'destElem &= ~imm;' 3203 oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True) 3204 oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True) 3205 3206 vqmovnCode = ''' 3207 FPSCR fpscr = (FPSCR)Fpscr; 3208 destElem = srcElem1; 3209 if ((BigElement)destElem != srcElem1) { 3210 fpscr.qc = 1; 3211 destElem = mask(sizeof(Element) * 8 - 1); 3212 if (srcElem1 < 0) 3213 destElem = ~destElem; 3214 } 3215 Fpscr = fpscr; 3216 ''' 3217 twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode) 3218 3219 vqmovunCode = ''' 3220 FPSCR fpscr = (FPSCR)Fpscr; 3221 destElem = srcElem1; 3222 if ((BigElement)destElem != srcElem1) { 3223 fpscr.qc = 1; 3224 destElem = mask(sizeof(Element) * 8); 3225 } 3226 Fpscr = fpscr; 3227 ''' 3228 twoRegNarrowMiscInst("vqmovun", "NVqmovun", 3229 smallUnsignedTypes, vqmovunCode) 3230 3231 vqmovunsCode = ''' 3232 FPSCR fpscr = (FPSCR)Fpscr; 3233 destElem = srcElem1; 3234 if (srcElem1 < 0 || 3235 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 3236 fpscr.qc = 1; 3237 destElem = mask(sizeof(Element) * 8); 3238 if (srcElem1 < 0) 3239 destElem = ~destElem; 3240 } 3241 Fpscr = fpscr; 3242 ''' 3243 twoRegNarrowMiscInst("vqmovun", "NVqmovuns", 3244 smallSignedTypes, vqmovunsCode) 3245 3246 def buildVext(name, Name, types, rCount, op): 3247 global header_output, exec_output 3248 eWalkCode = ''' 3249 RegVect srcReg1, srcReg2, destReg; 3250 ''' 3251 for reg in range(rCount): 3252 eWalkCode += ''' 3253 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 3254 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 3255 ''' % { "reg" : reg } 3256 eWalkCode += op 3257 for reg in range(rCount): 3258 eWalkCode += ''' 3259 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 3260 ''' % { "reg" : reg } 3261 iop = InstObjParams(name, Name, 3262 "RegRegRegImmOp", 3263 { "code": eWalkCode, 3264 "r_count": rCount, 3265 "predicate_test": predicateTest }, []) 3266 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 3267 exec_output += NeonEqualRegExecute.subst(iop) 3268 for type in types: 3269 substDict = { "targs" : type, 3270 "class_name" : Name } 3271 exec_output += NeonExecDeclare.subst(substDict) 3272 3273 vextCode = ''' 3274 for (unsigned i = 0; i < eCount; i++) { 3275 unsigned index = i + imm; 3276 if (index < eCount) { 3277 destReg.elements[i] = srcReg1.elements[index]; 3278 } else { 3279 index -= eCount; 3280 assert(index < eCount); 3281 destReg.elements[i] = srcReg2.elements[index]; 3282 } 3283 } 3284 ''' 3285 buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode) 3286 buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode) 3287 3288 def buildVtbxl(name, Name, length, isVtbl): 3289 global header_output, decoder_output, exec_output 3290 code = ''' 3291 union 3292 { 3293 uint8_t bytes[32]; 3294 FloatRegBits regs[8]; 3295 } table; 3296 3297 union 3298 { 3299 uint8_t bytes[8]; 3300 FloatRegBits regs[2]; 3301 } destReg, srcReg2; 3302 3303 const unsigned length = %(length)d; 3304 const bool isVtbl = %(isVtbl)s; 3305 3306 srcReg2.regs[0] = htog(FpOp2P0.uw); 3307 srcReg2.regs[1] = htog(FpOp2P1.uw); 3308 3309 destReg.regs[0] = htog(FpDestP0.uw); 3310 destReg.regs[1] = htog(FpDestP1.uw); 3311 ''' % { "length" : length, "isVtbl" : isVtbl } 3312 for reg in range(8): 3313 if reg < length * 2: 3314 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \ 3315 { "reg" : reg } 3316 else: 3317 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg } 3318 code += ''' 3319 for (unsigned i = 0; i < sizeof(destReg); i++) { 3320 uint8_t index = srcReg2.bytes[i]; 3321 if (index < 8 * length) { 3322 destReg.bytes[i] = table.bytes[index]; 3323 } else { 3324 if (isVtbl) 3325 destReg.bytes[i] = 0; 3326 // else destReg.bytes[i] unchanged 3327 } 3328 } 3329 3330 FpDestP0.uw = gtoh(destReg.regs[0]); 3331 FpDestP1.uw = gtoh(destReg.regs[1]); 3332 ''' 3333 iop = InstObjParams(name, Name, 3334 "RegRegRegOp", 3335 { "code": code, 3336 "predicate_test": predicateTest }, []) 3337 header_output += RegRegRegOpDeclare.subst(iop) 3338 decoder_output += RegRegRegOpConstructor.subst(iop) 3339 exec_output += PredOpExecute.subst(iop) 3340 3341 buildVtbxl("vtbl", "NVtbl1", 1, "true") 3342 buildVtbxl("vtbl", "NVtbl2", 2, "true") 3343 buildVtbxl("vtbl", "NVtbl3", 3, "true") 3344 buildVtbxl("vtbl", "NVtbl4", 4, "true") 3345 3346 buildVtbxl("vtbx", "NVtbx1", 1, "false") 3347 buildVtbxl("vtbx", "NVtbx2", 2, "false") 3348 buildVtbxl("vtbx", "NVtbx3", 3, "false") 3349 buildVtbxl("vtbx", "NVtbx4", 4, "false") 3350}}; 3351