neon.isa revision 9557
1// -*- mode:c++ -*- 2 3// Copyright (c) 2010 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Gabe Black 39 40output header {{ 41 template <template <typename T> class Base> 42 StaticInstPtr 43 decodeNeonUThreeUReg(unsigned size, 44 ExtMachInst machInst, IntRegIndex dest, 45 IntRegIndex op1, IntRegIndex op2) 46 { 47 switch (size) { 48 case 0: 49 return new Base<uint8_t>(machInst, dest, op1, op2); 50 case 1: 51 return new Base<uint16_t>(machInst, dest, op1, op2); 52 case 2: 53 return new Base<uint32_t>(machInst, dest, op1, op2); 54 case 3: 55 return new Base<uint64_t>(machInst, dest, op1, op2); 56 default: 57 return new Unknown(machInst); 58 } 59 } 60 61 template <template <typename T> class Base> 62 StaticInstPtr 63 decodeNeonSThreeUReg(unsigned size, 64 ExtMachInst machInst, IntRegIndex dest, 65 IntRegIndex op1, IntRegIndex op2) 66 { 67 switch (size) { 68 case 0: 69 return new Base<int8_t>(machInst, dest, op1, op2); 70 case 1: 71 return new Base<int16_t>(machInst, dest, op1, op2); 72 case 2: 73 return new Base<int32_t>(machInst, dest, op1, op2); 74 case 3: 75 return new Base<int64_t>(machInst, dest, op1, op2); 76 default: 77 return new Unknown(machInst); 78 } 79 } 80 81 template <template <typename T> class Base> 82 StaticInstPtr 83 decodeNeonUSThreeUReg(bool notSigned, unsigned size, 84 ExtMachInst machInst, IntRegIndex dest, 85 IntRegIndex op1, IntRegIndex op2) 86 { 87 if (notSigned) { 88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2); 89 } else { 90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2); 91 } 92 } 93 94 template <template <typename T> class Base> 95 StaticInstPtr 96 decodeNeonUThreeUSReg(unsigned size, 97 ExtMachInst machInst, IntRegIndex dest, 98 IntRegIndex op1, IntRegIndex op2) 99 { 100 switch (size) { 101 case 0: 102 return new Base<uint8_t>(machInst, dest, op1, op2); 103 case 1: 104 return new Base<uint16_t>(machInst, dest, op1, op2); 105 case 2: 106 return new Base<uint32_t>(machInst, dest, op1, op2); 107 default: 108 return new Unknown(machInst); 109 } 110 } 111 112 template <template <typename T> class Base> 113 StaticInstPtr 114 decodeNeonSThreeUSReg(unsigned size, 115 ExtMachInst machInst, IntRegIndex dest, 116 IntRegIndex op1, IntRegIndex op2) 117 { 118 switch (size) { 119 case 0: 120 return new Base<int8_t>(machInst, dest, op1, op2); 121 case 1: 122 return new Base<int16_t>(machInst, dest, op1, op2); 123 case 2: 124 return new Base<int32_t>(machInst, dest, op1, op2); 125 default: 126 return new Unknown(machInst); 127 } 128 } 129 130 template <template <typename T> class Base> 131 StaticInstPtr 132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size, 133 ExtMachInst machInst, IntRegIndex dest, 134 IntRegIndex op1, IntRegIndex op2) 135 { 136 if (notSigned) { 137 return decodeNeonUThreeUSReg<Base>( 138 size, machInst, dest, op1, op2); 139 } else { 140 return decodeNeonSThreeUSReg<Base>( 141 size, machInst, dest, op1, op2); 142 } 143 } 144 145 template <template <typename T> class BaseD, 146 template <typename T> class BaseQ> 147 StaticInstPtr 148 decodeNeonUThreeSReg(bool q, unsigned size, 149 ExtMachInst machInst, IntRegIndex dest, 150 IntRegIndex op1, IntRegIndex op2) 151 { 152 if (q) { 153 return decodeNeonUThreeUSReg<BaseQ>( 154 size, machInst, dest, op1, op2); 155 } else { 156 return decodeNeonUThreeUSReg<BaseD>( 157 size, machInst, dest, op1, op2); 158 } 159 } 160 161 template <template <typename T> class BaseD, 162 template <typename T> class BaseQ> 163 StaticInstPtr 164 decodeNeonSThreeSReg(bool q, unsigned size, 165 ExtMachInst machInst, IntRegIndex dest, 166 IntRegIndex op1, IntRegIndex op2) 167 { 168 if (q) { 169 return decodeNeonSThreeUSReg<BaseQ>( 170 size, machInst, dest, op1, op2); 171 } else { 172 return decodeNeonSThreeUSReg<BaseD>( 173 size, machInst, dest, op1, op2); 174 } 175 } 176 177 template <template <typename T> class BaseD, 178 template <typename T> class BaseQ> 179 StaticInstPtr 180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, 181 ExtMachInst machInst, IntRegIndex dest, 182 IntRegIndex op1, IntRegIndex op2) 183 { 184 if (notSigned) { 185 return decodeNeonUThreeSReg<BaseD, BaseQ>( 186 q, size, machInst, dest, op1, op2); 187 } else { 188 return decodeNeonSThreeSReg<BaseD, BaseQ>( 189 q, size, machInst, dest, op1, op2); 190 } 191 } 192 193 template <template <typename T> class BaseD, 194 template <typename T> class BaseQ> 195 StaticInstPtr 196 decodeNeonUThreeReg(bool q, unsigned size, 197 ExtMachInst machInst, IntRegIndex dest, 198 IntRegIndex op1, IntRegIndex op2) 199 { 200 if (q) { 201 return decodeNeonUThreeUReg<BaseQ>( 202 size, machInst, dest, op1, op2); 203 } else { 204 return decodeNeonUThreeUReg<BaseD>( 205 size, machInst, dest, op1, op2); 206 } 207 } 208 209 template <template <typename T> class BaseD, 210 template <typename T> class BaseQ> 211 StaticInstPtr 212 decodeNeonSThreeReg(bool q, unsigned size, 213 ExtMachInst machInst, IntRegIndex dest, 214 IntRegIndex op1, IntRegIndex op2) 215 { 216 if (q) { 217 return decodeNeonSThreeUReg<BaseQ>( 218 size, machInst, dest, op1, op2); 219 } else { 220 return decodeNeonSThreeUReg<BaseD>( 221 size, machInst, dest, op1, op2); 222 } 223 } 224 225 template <template <typename T> class BaseD, 226 template <typename T> class BaseQ> 227 StaticInstPtr 228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size, 229 ExtMachInst machInst, IntRegIndex dest, 230 IntRegIndex op1, IntRegIndex op2) 231 { 232 if (notSigned) { 233 return decodeNeonUThreeReg<BaseD, BaseQ>( 234 q, size, machInst, dest, op1, op2); 235 } else { 236 return decodeNeonSThreeReg<BaseD, BaseQ>( 237 q, size, machInst, dest, op1, op2); 238 } 239 } 240 241 template <template <typename T> class BaseD, 242 template <typename T> class BaseQ> 243 StaticInstPtr 244 decodeNeonUTwoShiftReg(bool q, unsigned size, 245 ExtMachInst machInst, IntRegIndex dest, 246 IntRegIndex op1, uint64_t imm) 247 { 248 if (q) { 249 switch (size) { 250 case 0: 251 return new BaseQ<uint8_t>(machInst, dest, op1, imm); 252 case 1: 253 return new BaseQ<uint16_t>(machInst, dest, op1, imm); 254 case 2: 255 return new BaseQ<uint32_t>(machInst, dest, op1, imm); 256 case 3: 257 return new BaseQ<uint64_t>(machInst, dest, op1, imm); 258 default: 259 return new Unknown(machInst); 260 } 261 } else { 262 switch (size) { 263 case 0: 264 return new BaseD<uint8_t>(machInst, dest, op1, imm); 265 case 1: 266 return new BaseD<uint16_t>(machInst, dest, op1, imm); 267 case 2: 268 return new BaseD<uint32_t>(machInst, dest, op1, imm); 269 case 3: 270 return new BaseD<uint64_t>(machInst, dest, op1, imm); 271 default: 272 return new Unknown(machInst); 273 } 274 } 275 } 276 277 template <template <typename T> class BaseD, 278 template <typename T> class BaseQ> 279 StaticInstPtr 280 decodeNeonSTwoShiftReg(bool q, unsigned size, 281 ExtMachInst machInst, IntRegIndex dest, 282 IntRegIndex op1, uint64_t imm) 283 { 284 if (q) { 285 switch (size) { 286 case 0: 287 return new BaseQ<int8_t>(machInst, dest, op1, imm); 288 case 1: 289 return new BaseQ<int16_t>(machInst, dest, op1, imm); 290 case 2: 291 return new BaseQ<int32_t>(machInst, dest, op1, imm); 292 case 3: 293 return new BaseQ<int64_t>(machInst, dest, op1, imm); 294 default: 295 return new Unknown(machInst); 296 } 297 } else { 298 switch (size) { 299 case 0: 300 return new BaseD<int8_t>(machInst, dest, op1, imm); 301 case 1: 302 return new BaseD<int16_t>(machInst, dest, op1, imm); 303 case 2: 304 return new BaseD<int32_t>(machInst, dest, op1, imm); 305 case 3: 306 return new BaseD<int64_t>(machInst, dest, op1, imm); 307 default: 308 return new Unknown(machInst); 309 } 310 } 311 } 312 313 314 template <template <typename T> class BaseD, 315 template <typename T> class BaseQ> 316 StaticInstPtr 317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size, 318 ExtMachInst machInst, IntRegIndex dest, 319 IntRegIndex op1, uint64_t imm) 320 { 321 if (notSigned) { 322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>( 323 q, size, machInst, dest, op1, imm); 324 } else { 325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>( 326 q, size, machInst, dest, op1, imm); 327 } 328 } 329 330 template <template <typename T> class Base> 331 StaticInstPtr 332 decodeNeonUTwoShiftUSReg(unsigned size, 333 ExtMachInst machInst, IntRegIndex dest, 334 IntRegIndex op1, uint64_t imm) 335 { 336 switch (size) { 337 case 0: 338 return new Base<uint8_t>(machInst, dest, op1, imm); 339 case 1: 340 return new Base<uint16_t>(machInst, dest, op1, imm); 341 case 2: 342 return new Base<uint32_t>(machInst, dest, op1, imm); 343 default: 344 return new Unknown(machInst); 345 } 346 } 347 348 template <template <typename T> class BaseD, 349 template <typename T> class BaseQ> 350 StaticInstPtr 351 decodeNeonUTwoShiftSReg(bool q, unsigned size, 352 ExtMachInst machInst, IntRegIndex dest, 353 IntRegIndex op1, uint64_t imm) 354 { 355 if (q) { 356 return decodeNeonUTwoShiftUSReg<BaseQ>( 357 size, machInst, dest, op1, imm); 358 } else { 359 return decodeNeonUTwoShiftUSReg<BaseD>( 360 size, machInst, dest, op1, imm); 361 } 362 } 363 364 template <template <typename T> class Base> 365 StaticInstPtr 366 decodeNeonSTwoShiftUSReg(unsigned size, 367 ExtMachInst machInst, IntRegIndex dest, 368 IntRegIndex op1, uint64_t imm) 369 { 370 switch (size) { 371 case 0: 372 return new Base<int8_t>(machInst, dest, op1, imm); 373 case 1: 374 return new Base<int16_t>(machInst, dest, op1, imm); 375 case 2: 376 return new Base<int32_t>(machInst, dest, op1, imm); 377 default: 378 return new Unknown(machInst); 379 } 380 } 381 382 template <template <typename T> class BaseD, 383 template <typename T> class BaseQ> 384 StaticInstPtr 385 decodeNeonSTwoShiftSReg(bool q, unsigned size, 386 ExtMachInst machInst, IntRegIndex dest, 387 IntRegIndex op1, uint64_t imm) 388 { 389 if (q) { 390 return decodeNeonSTwoShiftUSReg<BaseQ>( 391 size, machInst, dest, op1, imm); 392 } else { 393 return decodeNeonSTwoShiftUSReg<BaseD>( 394 size, machInst, dest, op1, imm); 395 } 396 } 397 398 template <template <typename T> class BaseD, 399 template <typename T> class BaseQ> 400 StaticInstPtr 401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size, 402 ExtMachInst machInst, IntRegIndex dest, 403 IntRegIndex op1, uint64_t imm) 404 { 405 if (notSigned) { 406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 407 q, size, machInst, dest, op1, imm); 408 } else { 409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 410 q, size, machInst, dest, op1, imm); 411 } 412 } 413 414 template <template <typename T> class Base> 415 StaticInstPtr 416 decodeNeonUTwoMiscUSReg(unsigned size, 417 ExtMachInst machInst, IntRegIndex dest, 418 IntRegIndex op1) 419 { 420 switch (size) { 421 case 0: 422 return new Base<uint8_t>(machInst, dest, op1); 423 case 1: 424 return new Base<uint16_t>(machInst, dest, op1); 425 case 2: 426 return new Base<uint32_t>(machInst, dest, op1); 427 default: 428 return new Unknown(machInst); 429 } 430 } 431 432 template <template <typename T> class Base> 433 StaticInstPtr 434 decodeNeonSTwoMiscUSReg(unsigned size, 435 ExtMachInst machInst, IntRegIndex dest, 436 IntRegIndex op1) 437 { 438 switch (size) { 439 case 0: 440 return new Base<int8_t>(machInst, dest, op1); 441 case 1: 442 return new Base<int16_t>(machInst, dest, op1); 443 case 2: 444 return new Base<int32_t>(machInst, dest, op1); 445 default: 446 return new Unknown(machInst); 447 } 448 } 449 450 template <template <typename T> class BaseD, 451 template <typename T> class BaseQ> 452 StaticInstPtr 453 decodeNeonUTwoMiscSReg(bool q, unsigned size, 454 ExtMachInst machInst, IntRegIndex dest, 455 IntRegIndex op1) 456 { 457 if (q) { 458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 459 } else { 460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 461 } 462 } 463 464 template <template <typename T> class BaseD, 465 template <typename T> class BaseQ> 466 StaticInstPtr 467 decodeNeonSTwoMiscSReg(bool q, unsigned size, 468 ExtMachInst machInst, IntRegIndex dest, 469 IntRegIndex op1) 470 { 471 if (q) { 472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 473 } else { 474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 475 } 476 } 477 478 template <template <typename T> class Base> 479 StaticInstPtr 480 decodeNeonUTwoMiscUReg(unsigned size, 481 ExtMachInst machInst, IntRegIndex dest, 482 IntRegIndex op1) 483 { 484 switch (size) { 485 case 0: 486 return new Base<uint8_t>(machInst, dest, op1); 487 case 1: 488 return new Base<uint16_t>(machInst, dest, op1); 489 case 2: 490 return new Base<uint32_t>(machInst, dest, op1); 491 case 3: 492 return new Base<uint64_t>(machInst, dest, op1); 493 default: 494 return new Unknown(machInst); 495 } 496 } 497 498 template <template <typename T> class Base> 499 StaticInstPtr 500 decodeNeonSTwoMiscUReg(unsigned size, 501 ExtMachInst machInst, IntRegIndex dest, 502 IntRegIndex op1) 503 { 504 switch (size) { 505 case 0: 506 return new Base<int8_t>(machInst, dest, op1); 507 case 1: 508 return new Base<int16_t>(machInst, dest, op1); 509 case 2: 510 return new Base<int32_t>(machInst, dest, op1); 511 case 3: 512 return new Base<int64_t>(machInst, dest, op1); 513 default: 514 return new Unknown(machInst); 515 } 516 } 517 518 template <template <typename T> class BaseD, 519 template <typename T> class BaseQ> 520 StaticInstPtr 521 decodeNeonSTwoMiscReg(bool q, unsigned size, 522 ExtMachInst machInst, IntRegIndex dest, 523 IntRegIndex op1) 524 { 525 if (q) { 526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 527 } else { 528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1); 529 } 530 } 531 532 template <template <typename T> class BaseD, 533 template <typename T> class BaseQ> 534 StaticInstPtr 535 decodeNeonUTwoMiscReg(bool q, unsigned size, 536 ExtMachInst machInst, IntRegIndex dest, 537 IntRegIndex op1) 538 { 539 if (q) { 540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 541 } else { 542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1); 543 } 544 } 545 546 template <template <typename T> class BaseD, 547 template <typename T> class BaseQ> 548 StaticInstPtr 549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size, 550 ExtMachInst machInst, IntRegIndex dest, 551 IntRegIndex op1) 552 { 553 if (notSigned) { 554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 555 q, size, machInst, dest, op1); 556 } else { 557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 558 q, size, machInst, dest, op1); 559 } 560 } 561 562}}; 563 564output exec {{ 565 static float 566 vcgtFunc(float op1, float op2) 567 { 568 if (std::isnan(op1) || std::isnan(op2)) 569 return 2.0; 570 return (op1 > op2) ? 0.0 : 1.0; 571 } 572 573 static float 574 vcgeFunc(float op1, float op2) 575 { 576 if (std::isnan(op1) || std::isnan(op2)) 577 return 2.0; 578 return (op1 >= op2) ? 0.0 : 1.0; 579 } 580 581 static float 582 vceqFunc(float op1, float op2) 583 { 584 if (isSnan(op1) || isSnan(op2)) 585 return 2.0; 586 return (op1 == op2) ? 0.0 : 1.0; 587 } 588 589 static float 590 vcleFunc(float op1, float op2) 591 { 592 if (std::isnan(op1) || std::isnan(op2)) 593 return 2.0; 594 return (op1 <= op2) ? 0.0 : 1.0; 595 } 596 597 static float 598 vcltFunc(float op1, float op2) 599 { 600 if (std::isnan(op1) || std::isnan(op2)) 601 return 2.0; 602 return (op1 < op2) ? 0.0 : 1.0; 603 } 604 605 static float 606 vacgtFunc(float op1, float op2) 607 { 608 if (std::isnan(op1) || std::isnan(op2)) 609 return 2.0; 610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0; 611 } 612 613 static float 614 vacgeFunc(float op1, float op2) 615 { 616 if (std::isnan(op1) || std::isnan(op2)) 617 return 2.0; 618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0; 619 } 620}}; 621 622let {{ 623 624 header_output = "" 625 exec_output = "" 626 627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t") 628 unsignedTypes = smallUnsignedTypes + ("uint64_t",) 629 smallSignedTypes = ("int8_t", "int16_t", "int32_t") 630 signedTypes = smallSignedTypes + ("int64_t",) 631 smallTypes = smallUnsignedTypes + smallSignedTypes 632 allTypes = unsignedTypes + signedTypes 633 634 def threeEqualRegInst(name, Name, opClass, types, rCount, op, 635 readDest=False, pairwise=False): 636 global header_output, exec_output 637 eWalkCode = simdEnabledCheckCode + ''' 638 RegVect srcReg1, srcReg2, destReg; 639 ''' 640 for reg in range(rCount): 641 eWalkCode += ''' 642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 644 ''' % { "reg" : reg } 645 if readDest: 646 eWalkCode += ''' 647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 648 ''' % { "reg" : reg } 649 readDestCode = '' 650 if readDest: 651 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 652 if pairwise: 653 eWalkCode += ''' 654 for (unsigned i = 0; i < eCount; i++) { 655 Element srcElem1 = gtoh(2 * i < eCount ? 656 srcReg1.elements[2 * i] : 657 srcReg2.elements[2 * i - eCount]); 658 Element srcElem2 = gtoh(2 * i < eCount ? 659 srcReg1.elements[2 * i + 1] : 660 srcReg2.elements[2 * i + 1 - eCount]); 661 Element destElem; 662 %(readDest)s 663 %(op)s 664 destReg.elements[i] = htog(destElem); 665 } 666 ''' % { "op" : op, "readDest" : readDestCode } 667 else: 668 eWalkCode += ''' 669 for (unsigned i = 0; i < eCount; i++) { 670 Element srcElem1 = gtoh(srcReg1.elements[i]); 671 Element srcElem2 = gtoh(srcReg2.elements[i]); 672 Element destElem; 673 %(readDest)s 674 %(op)s 675 destReg.elements[i] = htog(destElem); 676 } 677 ''' % { "op" : op, "readDest" : readDestCode } 678 for reg in range(rCount): 679 eWalkCode += ''' 680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 681 ''' % { "reg" : reg } 682 iop = InstObjParams(name, Name, 683 "RegRegRegOp", 684 { "code": eWalkCode, 685 "r_count": rCount, 686 "predicate_test": predicateTest, 687 "op_class": opClass }, []) 688 header_output += NeonRegRegRegOpDeclare.subst(iop) 689 exec_output += NeonEqualRegExecute.subst(iop) 690 for type in types: 691 substDict = { "targs" : type, 692 "class_name" : Name } 693 exec_output += NeonExecDeclare.subst(substDict) 694 695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op, 696 readDest=False, pairwise=False, toInt=False): 697 global header_output, exec_output 698 eWalkCode = simdEnabledCheckCode + ''' 699 typedef FloatReg FloatVect[rCount]; 700 FloatVect srcRegs1, srcRegs2; 701 ''' 702 if toInt: 703 eWalkCode += 'RegVect destRegs;\n' 704 else: 705 eWalkCode += 'FloatVect destRegs;\n' 706 for reg in range(rCount): 707 eWalkCode += ''' 708 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 709 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 710 ''' % { "reg" : reg } 711 if readDest: 712 if toInt: 713 eWalkCode += ''' 714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 715 ''' % { "reg" : reg } 716 else: 717 eWalkCode += ''' 718 destRegs[%(reg)d] = FpDestP%(reg)d; 719 ''' % { "reg" : reg } 720 readDestCode = '' 721 if readDest: 722 readDestCode = 'destReg = destRegs[r];' 723 destType = 'FloatReg' 724 writeDest = 'destRegs[r] = destReg;' 725 if toInt: 726 destType = 'FloatRegBits' 727 writeDest = 'destRegs.regs[r] = destReg;' 728 if pairwise: 729 eWalkCode += ''' 730 for (unsigned r = 0; r < rCount; r++) { 731 FloatReg srcReg1 = (2 * r < rCount) ? 732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount]; 733 FloatReg srcReg2 = (2 * r < rCount) ? 734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount]; 735 %(destType)s destReg; 736 %(readDest)s 737 %(op)s 738 %(writeDest)s 739 } 740 ''' % { "op" : op, 741 "readDest" : readDestCode, 742 "destType" : destType, 743 "writeDest" : writeDest } 744 else: 745 eWalkCode += ''' 746 for (unsigned r = 0; r < rCount; r++) { 747 FloatReg srcReg1 = srcRegs1[r]; 748 FloatReg srcReg2 = srcRegs2[r]; 749 %(destType)s destReg; 750 %(readDest)s 751 %(op)s 752 %(writeDest)s 753 } 754 ''' % { "op" : op, 755 "readDest" : readDestCode, 756 "destType" : destType, 757 "writeDest" : writeDest } 758 for reg in range(rCount): 759 if toInt: 760 eWalkCode += ''' 761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; 762 ''' % { "reg" : reg } 763 else: 764 eWalkCode += ''' 765 FpDestP%(reg)d = destRegs[%(reg)d]; 766 ''' % { "reg" : reg } 767 iop = InstObjParams(name, Name, 768 "FpRegRegRegOp", 769 { "code": eWalkCode, 770 "r_count": rCount, 771 "predicate_test": predicateTest, 772 "op_class": opClass }, []) 773 header_output += NeonRegRegRegOpDeclare.subst(iop) 774 exec_output += NeonEqualRegExecute.subst(iop) 775 for type in types: 776 substDict = { "targs" : type, 777 "class_name" : Name } 778 exec_output += NeonExecDeclare.subst(substDict) 779 780 def threeUnequalRegInst(name, Name, opClass, types, op, 781 bigSrc1, bigSrc2, bigDest, readDest): 782 global header_output, exec_output 783 src1Cnt = src2Cnt = destCnt = 2 784 src1Prefix = src2Prefix = destPrefix = '' 785 if bigSrc1: 786 src1Cnt = 4 787 src1Prefix = 'Big' 788 if bigSrc2: 789 src2Cnt = 4 790 src2Prefix = 'Big' 791 if bigDest: 792 destCnt = 4 793 destPrefix = 'Big' 794 eWalkCode = simdEnabledCheckCode + ''' 795 %sRegVect srcReg1; 796 %sRegVect srcReg2; 797 %sRegVect destReg; 798 ''' % (src1Prefix, src2Prefix, destPrefix) 799 for reg in range(src1Cnt): 800 eWalkCode += ''' 801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 802 ''' % { "reg" : reg } 803 for reg in range(src2Cnt): 804 eWalkCode += ''' 805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 806 ''' % { "reg" : reg } 807 if readDest: 808 for reg in range(destCnt): 809 eWalkCode += ''' 810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 811 ''' % { "reg" : reg } 812 readDestCode = '' 813 if readDest: 814 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 815 eWalkCode += ''' 816 for (unsigned i = 0; i < eCount; i++) { 817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]); 819 %(destPrefix)sElement destElem; 820 %(readDest)s 821 %(op)s 822 destReg.elements[i] = htog(destElem); 823 } 824 ''' % { "op" : op, "readDest" : readDestCode, 825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 826 "destPrefix" : destPrefix } 827 for reg in range(destCnt): 828 eWalkCode += ''' 829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 830 ''' % { "reg" : reg } 831 iop = InstObjParams(name, Name, 832 "RegRegRegOp", 833 { "code": eWalkCode, 834 "r_count": 2, 835 "predicate_test": predicateTest, 836 "op_class": opClass }, []) 837 header_output += NeonRegRegRegOpDeclare.subst(iop) 838 exec_output += NeonUnequalRegExecute.subst(iop) 839 for type in types: 840 substDict = { "targs" : type, 841 "class_name" : Name } 842 exec_output += NeonExecDeclare.subst(substDict) 843 844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False): 845 threeUnequalRegInst(name, Name, opClass, types, op, 846 True, True, False, readDest) 847 848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False): 849 threeUnequalRegInst(name, Name, opClass, types, op, 850 False, False, True, readDest) 851 852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False): 853 threeUnequalRegInst(name, Name, opClass, types, op, 854 True, False, True, readDest) 855 856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False): 857 global header_output, exec_output 858 eWalkCode = simdEnabledCheckCode + ''' 859 RegVect srcReg1, srcReg2, destReg; 860 ''' 861 for reg in range(rCount): 862 eWalkCode += ''' 863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 865 ''' % { "reg" : reg } 866 if readDest: 867 eWalkCode += ''' 868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 869 ''' % { "reg" : reg } 870 readDestCode = '' 871 if readDest: 872 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 873 eWalkCode += ''' 874 if (imm < 0 && imm >= eCount) { 875 if (FullSystem) 876 fault = new UndefinedInstruction; 877 else 878 fault = new UndefinedInstruction(false, mnemonic); 879 } else { 880 for (unsigned i = 0; i < eCount; i++) { 881 Element srcElem1 = gtoh(srcReg1.elements[i]); 882 Element srcElem2 = gtoh(srcReg2.elements[imm]); 883 Element destElem; 884 %(readDest)s 885 %(op)s 886 destReg.elements[i] = htog(destElem); 887 } 888 } 889 ''' % { "op" : op, "readDest" : readDestCode } 890 for reg in range(rCount): 891 eWalkCode += ''' 892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 893 ''' % { "reg" : reg } 894 iop = InstObjParams(name, Name, 895 "RegRegRegImmOp", 896 { "code": eWalkCode, 897 "r_count": rCount, 898 "predicate_test": predicateTest, 899 "op_class": opClass }, []) 900 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 901 exec_output += NeonEqualRegExecute.subst(iop) 902 for type in types: 903 substDict = { "targs" : type, 904 "class_name" : Name } 905 exec_output += NeonExecDeclare.subst(substDict) 906 907 def twoRegLongInst(name, Name, opClass, types, op, readDest=False): 908 global header_output, exec_output 909 rCount = 2 910 eWalkCode = simdEnabledCheckCode + ''' 911 RegVect srcReg1, srcReg2; 912 BigRegVect destReg; 913 ''' 914 for reg in range(rCount): 915 eWalkCode += ''' 916 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 917 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);; 918 ''' % { "reg" : reg } 919 if readDest: 920 for reg in range(2 * rCount): 921 eWalkCode += ''' 922 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 923 ''' % { "reg" : reg } 924 readDestCode = '' 925 if readDest: 926 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 927 eWalkCode += ''' 928 if (imm < 0 && imm >= eCount) { 929 if (FullSystem) 930 fault = new UndefinedInstruction; 931 else 932 fault = new UndefinedInstruction(false, mnemonic); 933 } else { 934 for (unsigned i = 0; i < eCount; i++) { 935 Element srcElem1 = gtoh(srcReg1.elements[i]); 936 Element srcElem2 = gtoh(srcReg2.elements[imm]); 937 BigElement destElem; 938 %(readDest)s 939 %(op)s 940 destReg.elements[i] = htog(destElem); 941 } 942 } 943 ''' % { "op" : op, "readDest" : readDestCode } 944 for reg in range(2 * rCount): 945 eWalkCode += ''' 946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 947 ''' % { "reg" : reg } 948 iop = InstObjParams(name, Name, 949 "RegRegRegImmOp", 950 { "code": eWalkCode, 951 "r_count": rCount, 952 "predicate_test": predicateTest, 953 "op_class": opClass }, []) 954 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 955 exec_output += NeonUnequalRegExecute.subst(iop) 956 for type in types: 957 substDict = { "targs" : type, 958 "class_name" : Name } 959 exec_output += NeonExecDeclare.subst(substDict) 960 961 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False): 962 global header_output, exec_output 963 eWalkCode = simdEnabledCheckCode + ''' 964 typedef FloatReg FloatVect[rCount]; 965 FloatVect srcRegs1, srcRegs2, destRegs; 966 ''' 967 for reg in range(rCount): 968 eWalkCode += ''' 969 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 970 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 971 ''' % { "reg" : reg } 972 if readDest: 973 eWalkCode += ''' 974 destRegs[%(reg)d] = FpDestP%(reg)d; 975 ''' % { "reg" : reg } 976 readDestCode = '' 977 if readDest: 978 readDestCode = 'destReg = destRegs[i];' 979 eWalkCode += ''' 980 if (imm < 0 && imm >= eCount) { 981 if (FullSystem) 982 fault = new UndefinedInstruction; 983 else 984 fault = new UndefinedInstruction(false, mnemonic); 985 } else { 986 for (unsigned i = 0; i < rCount; i++) { 987 FloatReg srcReg1 = srcRegs1[i]; 988 FloatReg srcReg2 = srcRegs2[imm]; 989 FloatReg destReg; 990 %(readDest)s 991 %(op)s 992 destRegs[i] = destReg; 993 } 994 } 995 ''' % { "op" : op, "readDest" : readDestCode } 996 for reg in range(rCount): 997 eWalkCode += ''' 998 FpDestP%(reg)d = destRegs[%(reg)d]; 999 ''' % { "reg" : reg } 1000 iop = InstObjParams(name, Name, 1001 "FpRegRegRegImmOp", 1002 { "code": eWalkCode, 1003 "r_count": rCount, 1004 "predicate_test": predicateTest, 1005 "op_class": opClass }, []) 1006 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 1007 exec_output += NeonEqualRegExecute.subst(iop) 1008 for type in types: 1009 substDict = { "targs" : type, 1010 "class_name" : Name } 1011 exec_output += NeonExecDeclare.subst(substDict) 1012 1013 def twoRegShiftInst(name, Name, opClass, types, rCount, op, 1014 readDest=False, toInt=False, fromInt=False): 1015 global header_output, exec_output 1016 eWalkCode = simdEnabledCheckCode + ''' 1017 RegVect srcRegs1, destRegs; 1018 ''' 1019 for reg in range(rCount): 1020 eWalkCode += ''' 1021 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1022 ''' % { "reg" : reg } 1023 if readDest: 1024 eWalkCode += ''' 1025 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1026 ''' % { "reg" : reg } 1027 readDestCode = '' 1028 if readDest: 1029 readDestCode = 'destElem = gtoh(destRegs.elements[i]);' 1030 if toInt: 1031 readDestCode = 'destReg = gtoh(destRegs.regs[i]);' 1032 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);' 1033 if fromInt: 1034 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);' 1035 declDest = 'Element destElem;' 1036 writeDestCode = 'destRegs.elements[i] = htog(destElem);' 1037 if toInt: 1038 declDest = 'FloatRegBits destReg;' 1039 writeDestCode = 'destRegs.regs[i] = htog(destReg);' 1040 eWalkCode += ''' 1041 for (unsigned i = 0; i < eCount; i++) { 1042 %(readOp)s 1043 %(declDest)s 1044 %(readDest)s 1045 %(op)s 1046 %(writeDest)s 1047 } 1048 ''' % { "readOp" : readOpCode, 1049 "declDest" : declDest, 1050 "readDest" : readDestCode, 1051 "op" : op, 1052 "writeDest" : writeDestCode } 1053 for reg in range(rCount): 1054 eWalkCode += ''' 1055 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]); 1056 ''' % { "reg" : reg } 1057 iop = InstObjParams(name, Name, 1058 "RegRegImmOp", 1059 { "code": eWalkCode, 1060 "r_count": rCount, 1061 "predicate_test": predicateTest, 1062 "op_class": opClass }, []) 1063 header_output += NeonRegRegImmOpDeclare.subst(iop) 1064 exec_output += NeonEqualRegExecute.subst(iop) 1065 for type in types: 1066 substDict = { "targs" : type, 1067 "class_name" : Name } 1068 exec_output += NeonExecDeclare.subst(substDict) 1069 1070 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False): 1071 global header_output, exec_output 1072 eWalkCode = simdEnabledCheckCode + ''' 1073 BigRegVect srcReg1; 1074 RegVect destReg; 1075 ''' 1076 for reg in range(4): 1077 eWalkCode += ''' 1078 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1079 ''' % { "reg" : reg } 1080 if readDest: 1081 for reg in range(2): 1082 eWalkCode += ''' 1083 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1084 ''' % { "reg" : reg } 1085 readDestCode = '' 1086 if readDest: 1087 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1088 eWalkCode += ''' 1089 for (unsigned i = 0; i < eCount; i++) { 1090 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1091 Element destElem; 1092 %(readDest)s 1093 %(op)s 1094 destReg.elements[i] = htog(destElem); 1095 } 1096 ''' % { "op" : op, "readDest" : readDestCode } 1097 for reg in range(2): 1098 eWalkCode += ''' 1099 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1100 ''' % { "reg" : reg } 1101 iop = InstObjParams(name, Name, 1102 "RegRegImmOp", 1103 { "code": eWalkCode, 1104 "r_count": 2, 1105 "predicate_test": predicateTest, 1106 "op_class": opClass }, []) 1107 header_output += NeonRegRegImmOpDeclare.subst(iop) 1108 exec_output += NeonUnequalRegExecute.subst(iop) 1109 for type in types: 1110 substDict = { "targs" : type, 1111 "class_name" : Name } 1112 exec_output += NeonExecDeclare.subst(substDict) 1113 1114 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False): 1115 global header_output, exec_output 1116 eWalkCode = simdEnabledCheckCode + ''' 1117 RegVect srcReg1; 1118 BigRegVect destReg; 1119 ''' 1120 for reg in range(2): 1121 eWalkCode += ''' 1122 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1123 ''' % { "reg" : reg } 1124 if readDest: 1125 for reg in range(4): 1126 eWalkCode += ''' 1127 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1128 ''' % { "reg" : reg } 1129 readDestCode = '' 1130 if readDest: 1131 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1132 eWalkCode += ''' 1133 for (unsigned i = 0; i < eCount; i++) { 1134 Element srcElem1 = gtoh(srcReg1.elements[i]); 1135 BigElement destElem; 1136 %(readDest)s 1137 %(op)s 1138 destReg.elements[i] = htog(destElem); 1139 } 1140 ''' % { "op" : op, "readDest" : readDestCode } 1141 for reg in range(4): 1142 eWalkCode += ''' 1143 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1144 ''' % { "reg" : reg } 1145 iop = InstObjParams(name, Name, 1146 "RegRegImmOp", 1147 { "code": eWalkCode, 1148 "r_count": 2, 1149 "predicate_test": predicateTest, 1150 "op_class": opClass }, []) 1151 header_output += NeonRegRegImmOpDeclare.subst(iop) 1152 exec_output += NeonUnequalRegExecute.subst(iop) 1153 for type in types: 1154 substDict = { "targs" : type, 1155 "class_name" : Name } 1156 exec_output += NeonExecDeclare.subst(substDict) 1157 1158 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False): 1159 global header_output, exec_output 1160 eWalkCode = simdEnabledCheckCode + ''' 1161 RegVect srcReg1, destReg; 1162 ''' 1163 for reg in range(rCount): 1164 eWalkCode += ''' 1165 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1166 ''' % { "reg" : reg } 1167 if readDest: 1168 eWalkCode += ''' 1169 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1170 ''' % { "reg" : reg } 1171 readDestCode = '' 1172 if readDest: 1173 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1174 eWalkCode += ''' 1175 for (unsigned i = 0; i < eCount; i++) { 1176 unsigned j = i; 1177 Element srcElem1 = gtoh(srcReg1.elements[i]); 1178 Element destElem; 1179 %(readDest)s 1180 %(op)s 1181 destReg.elements[j] = htog(destElem); 1182 } 1183 ''' % { "op" : op, "readDest" : readDestCode } 1184 for reg in range(rCount): 1185 eWalkCode += ''' 1186 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1187 ''' % { "reg" : reg } 1188 iop = InstObjParams(name, Name, 1189 "RegRegOp", 1190 { "code": eWalkCode, 1191 "r_count": rCount, 1192 "predicate_test": predicateTest, 1193 "op_class": opClass }, []) 1194 header_output += NeonRegRegOpDeclare.subst(iop) 1195 exec_output += NeonEqualRegExecute.subst(iop) 1196 for type in types: 1197 substDict = { "targs" : type, 1198 "class_name" : Name } 1199 exec_output += NeonExecDeclare.subst(substDict) 1200 1201 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False): 1202 global header_output, exec_output 1203 eWalkCode = simdEnabledCheckCode + ''' 1204 RegVect srcReg1, destReg; 1205 ''' 1206 for reg in range(rCount): 1207 eWalkCode += ''' 1208 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1209 ''' % { "reg" : reg } 1210 if readDest: 1211 eWalkCode += ''' 1212 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1213 ''' % { "reg" : reg } 1214 readDestCode = '' 1215 if readDest: 1216 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1217 eWalkCode += ''' 1218 for (unsigned i = 0; i < eCount; i++) { 1219 Element srcElem1 = gtoh(srcReg1.elements[imm]); 1220 Element destElem; 1221 %(readDest)s 1222 %(op)s 1223 destReg.elements[i] = htog(destElem); 1224 } 1225 ''' % { "op" : op, "readDest" : readDestCode } 1226 for reg in range(rCount): 1227 eWalkCode += ''' 1228 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1229 ''' % { "reg" : reg } 1230 iop = InstObjParams(name, Name, 1231 "RegRegImmOp", 1232 { "code": eWalkCode, 1233 "r_count": rCount, 1234 "predicate_test": predicateTest, 1235 "op_class": opClass }, []) 1236 header_output += NeonRegRegImmOpDeclare.subst(iop) 1237 exec_output += NeonEqualRegExecute.subst(iop) 1238 for type in types: 1239 substDict = { "targs" : type, 1240 "class_name" : Name } 1241 exec_output += NeonExecDeclare.subst(substDict) 1242 1243 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False): 1244 global header_output, exec_output 1245 eWalkCode = simdEnabledCheckCode + ''' 1246 RegVect srcReg1, destReg; 1247 ''' 1248 for reg in range(rCount): 1249 eWalkCode += ''' 1250 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1251 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1252 ''' % { "reg" : reg } 1253 if readDest: 1254 eWalkCode += ''' 1255 ''' % { "reg" : reg } 1256 readDestCode = '' 1257 if readDest: 1258 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1259 eWalkCode += op 1260 for reg in range(rCount): 1261 eWalkCode += ''' 1262 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1263 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]); 1264 ''' % { "reg" : reg } 1265 iop = InstObjParams(name, Name, 1266 "RegRegOp", 1267 { "code": eWalkCode, 1268 "r_count": rCount, 1269 "predicate_test": predicateTest, 1270 "op_class": opClass }, []) 1271 header_output += NeonRegRegOpDeclare.subst(iop) 1272 exec_output += NeonEqualRegExecute.subst(iop) 1273 for type in types: 1274 substDict = { "targs" : type, 1275 "class_name" : Name } 1276 exec_output += NeonExecDeclare.subst(substDict) 1277 1278 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op, 1279 readDest=False, toInt=False): 1280 global header_output, exec_output 1281 eWalkCode = simdEnabledCheckCode + ''' 1282 typedef FloatReg FloatVect[rCount]; 1283 FloatVect srcRegs1; 1284 ''' 1285 if toInt: 1286 eWalkCode += 'RegVect destRegs;\n' 1287 else: 1288 eWalkCode += 'FloatVect destRegs;\n' 1289 for reg in range(rCount): 1290 eWalkCode += ''' 1291 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1292 ''' % { "reg" : reg } 1293 if readDest: 1294 if toInt: 1295 eWalkCode += ''' 1296 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1297 ''' % { "reg" : reg } 1298 else: 1299 eWalkCode += ''' 1300 destRegs[%(reg)d] = FpDestP%(reg)d; 1301 ''' % { "reg" : reg } 1302 readDestCode = '' 1303 if readDest: 1304 readDestCode = 'destReg = destRegs[i];' 1305 destType = 'FloatReg' 1306 writeDest = 'destRegs[r] = destReg;' 1307 if toInt: 1308 destType = 'FloatRegBits' 1309 writeDest = 'destRegs.regs[r] = destReg;' 1310 eWalkCode += ''' 1311 for (unsigned r = 0; r < rCount; r++) { 1312 FloatReg srcReg1 = srcRegs1[r]; 1313 %(destType)s destReg; 1314 %(readDest)s 1315 %(op)s 1316 %(writeDest)s 1317 } 1318 ''' % { "op" : op, 1319 "readDest" : readDestCode, 1320 "destType" : destType, 1321 "writeDest" : writeDest } 1322 for reg in range(rCount): 1323 if toInt: 1324 eWalkCode += ''' 1325 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; 1326 ''' % { "reg" : reg } 1327 else: 1328 eWalkCode += ''' 1329 FpDestP%(reg)d = destRegs[%(reg)d]; 1330 ''' % { "reg" : reg } 1331 iop = InstObjParams(name, Name, 1332 "FpRegRegOp", 1333 { "code": eWalkCode, 1334 "r_count": rCount, 1335 "predicate_test": predicateTest, 1336 "op_class": opClass }, []) 1337 header_output += NeonRegRegOpDeclare.subst(iop) 1338 exec_output += NeonEqualRegExecute.subst(iop) 1339 for type in types: 1340 substDict = { "targs" : type, 1341 "class_name" : Name } 1342 exec_output += NeonExecDeclare.subst(substDict) 1343 1344 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False): 1345 global header_output, exec_output 1346 eWalkCode = simdEnabledCheckCode + ''' 1347 RegVect srcRegs; 1348 BigRegVect destReg; 1349 ''' 1350 for reg in range(rCount): 1351 eWalkCode += ''' 1352 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1353 ''' % { "reg" : reg } 1354 if readDest: 1355 eWalkCode += ''' 1356 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1357 ''' % { "reg" : reg } 1358 readDestCode = '' 1359 if readDest: 1360 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1361 eWalkCode += ''' 1362 for (unsigned i = 0; i < eCount / 2; i++) { 1363 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 1364 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 1365 BigElement destElem; 1366 %(readDest)s 1367 %(op)s 1368 destReg.elements[i] = htog(destElem); 1369 } 1370 ''' % { "op" : op, "readDest" : readDestCode } 1371 for reg in range(rCount): 1372 eWalkCode += ''' 1373 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1374 ''' % { "reg" : reg } 1375 iop = InstObjParams(name, Name, 1376 "RegRegOp", 1377 { "code": eWalkCode, 1378 "r_count": rCount, 1379 "predicate_test": predicateTest, 1380 "op_class": opClass }, []) 1381 header_output += NeonRegRegOpDeclare.subst(iop) 1382 exec_output += NeonUnequalRegExecute.subst(iop) 1383 for type in types: 1384 substDict = { "targs" : type, 1385 "class_name" : Name } 1386 exec_output += NeonExecDeclare.subst(substDict) 1387 1388 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False): 1389 global header_output, exec_output 1390 eWalkCode = simdEnabledCheckCode + ''' 1391 BigRegVect srcReg1; 1392 RegVect destReg; 1393 ''' 1394 for reg in range(4): 1395 eWalkCode += ''' 1396 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1397 ''' % { "reg" : reg } 1398 if readDest: 1399 for reg in range(2): 1400 eWalkCode += ''' 1401 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1402 ''' % { "reg" : reg } 1403 readDestCode = '' 1404 if readDest: 1405 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1406 eWalkCode += ''' 1407 for (unsigned i = 0; i < eCount; i++) { 1408 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1409 Element destElem; 1410 %(readDest)s 1411 %(op)s 1412 destReg.elements[i] = htog(destElem); 1413 } 1414 ''' % { "op" : op, "readDest" : readDestCode } 1415 for reg in range(2): 1416 eWalkCode += ''' 1417 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1418 ''' % { "reg" : reg } 1419 iop = InstObjParams(name, Name, 1420 "RegRegOp", 1421 { "code": eWalkCode, 1422 "r_count": 2, 1423 "predicate_test": predicateTest, 1424 "op_class": opClass }, []) 1425 header_output += NeonRegRegOpDeclare.subst(iop) 1426 exec_output += NeonUnequalRegExecute.subst(iop) 1427 for type in types: 1428 substDict = { "targs" : type, 1429 "class_name" : Name } 1430 exec_output += NeonExecDeclare.subst(substDict) 1431 1432 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False): 1433 global header_output, exec_output 1434 eWalkCode = simdEnabledCheckCode + ''' 1435 RegVect destReg; 1436 ''' 1437 if readDest: 1438 for reg in range(rCount): 1439 eWalkCode += ''' 1440 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1441 ''' % { "reg" : reg } 1442 readDestCode = '' 1443 if readDest: 1444 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1445 eWalkCode += ''' 1446 for (unsigned i = 0; i < eCount; i++) { 1447 Element destElem; 1448 %(readDest)s 1449 %(op)s 1450 destReg.elements[i] = htog(destElem); 1451 } 1452 ''' % { "op" : op, "readDest" : readDestCode } 1453 for reg in range(rCount): 1454 eWalkCode += ''' 1455 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1456 ''' % { "reg" : reg } 1457 iop = InstObjParams(name, Name, 1458 "RegImmOp", 1459 { "code": eWalkCode, 1460 "r_count": rCount, 1461 "predicate_test": predicateTest, 1462 "op_class": opClass }, []) 1463 header_output += NeonRegImmOpDeclare.subst(iop) 1464 exec_output += NeonEqualRegExecute.subst(iop) 1465 for type in types: 1466 substDict = { "targs" : type, 1467 "class_name" : Name } 1468 exec_output += NeonExecDeclare.subst(substDict) 1469 1470 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False): 1471 global header_output, exec_output 1472 eWalkCode = simdEnabledCheckCode + ''' 1473 RegVect srcReg1; 1474 BigRegVect destReg; 1475 ''' 1476 for reg in range(2): 1477 eWalkCode += ''' 1478 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1479 ''' % { "reg" : reg } 1480 if readDest: 1481 for reg in range(4): 1482 eWalkCode += ''' 1483 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1484 ''' % { "reg" : reg } 1485 readDestCode = '' 1486 if readDest: 1487 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1488 eWalkCode += ''' 1489 for (unsigned i = 0; i < eCount; i++) { 1490 Element srcElem1 = gtoh(srcReg1.elements[i]); 1491 BigElement destElem; 1492 %(readDest)s 1493 %(op)s 1494 destReg.elements[i] = htog(destElem); 1495 } 1496 ''' % { "op" : op, "readDest" : readDestCode } 1497 for reg in range(4): 1498 eWalkCode += ''' 1499 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1500 ''' % { "reg" : reg } 1501 iop = InstObjParams(name, Name, 1502 "RegRegOp", 1503 { "code": eWalkCode, 1504 "r_count": 2, 1505 "predicate_test": predicateTest, 1506 "op_class": opClass }, []) 1507 header_output += NeonRegRegOpDeclare.subst(iop) 1508 exec_output += NeonUnequalRegExecute.subst(iop) 1509 for type in types: 1510 substDict = { "targs" : type, 1511 "class_name" : Name } 1512 exec_output += NeonExecDeclare.subst(substDict) 1513 1514 vhaddCode = ''' 1515 Element carryBit = 1516 (((unsigned)srcElem1 & 0x1) + 1517 ((unsigned)srcElem2 & 0x1)) >> 1; 1518 // Use division instead of a shift to ensure the sign extension works 1519 // right. The compiler will figure out if it can be a shift. Mask the 1520 // inputs so they get truncated correctly. 1521 destElem = (((srcElem1 & ~(Element)1) / 2) + 1522 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1523 ''' 1524 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode) 1525 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode) 1526 1527 vrhaddCode = ''' 1528 Element carryBit = 1529 (((unsigned)srcElem1 & 0x1) + 1530 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 1531 // Use division instead of a shift to ensure the sign extension works 1532 // right. The compiler will figure out if it can be a shift. Mask the 1533 // inputs so they get truncated correctly. 1534 destElem = (((srcElem1 & ~(Element)1) / 2) + 1535 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1536 ''' 1537 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode) 1538 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode) 1539 1540 vhsubCode = ''' 1541 Element barrowBit = 1542 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1543 // Use division instead of a shift to ensure the sign extension works 1544 // right. The compiler will figure out if it can be a shift. Mask the 1545 // inputs so they get truncated correctly. 1546 destElem = (((srcElem1 & ~(Element)1) / 2) - 1547 ((srcElem2 & ~(Element)1) / 2)) - barrowBit; 1548 ''' 1549 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode) 1550 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode) 1551 1552 vandCode = ''' 1553 destElem = srcElem1 & srcElem2; 1554 ''' 1555 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode) 1556 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode) 1557 1558 vbicCode = ''' 1559 destElem = srcElem1 & ~srcElem2; 1560 ''' 1561 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode) 1562 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode) 1563 1564 vorrCode = ''' 1565 destElem = srcElem1 | srcElem2; 1566 ''' 1567 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode) 1568 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode) 1569 1570 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode) 1571 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode) 1572 1573 vornCode = ''' 1574 destElem = srcElem1 | ~srcElem2; 1575 ''' 1576 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode) 1577 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode) 1578 1579 veorCode = ''' 1580 destElem = srcElem1 ^ srcElem2; 1581 ''' 1582 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode) 1583 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode) 1584 1585 vbifCode = ''' 1586 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2); 1587 ''' 1588 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True) 1589 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True) 1590 vbitCode = ''' 1591 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2); 1592 ''' 1593 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True) 1594 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True) 1595 vbslCode = ''' 1596 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem); 1597 ''' 1598 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True) 1599 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True) 1600 1601 vmaxCode = ''' 1602 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2; 1603 ''' 1604 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode) 1605 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode) 1606 1607 vminCode = ''' 1608 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2; 1609 ''' 1610 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode) 1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode) 1612 1613 vaddCode = ''' 1614 destElem = srcElem1 + srcElem2; 1615 ''' 1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode) 1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode) 1618 1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes, 1620 2, vaddCode, pairwise=True) 1621 vaddlwCode = ''' 1622 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 1623 ''' 1624 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode) 1625 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode) 1626 vaddhnCode = ''' 1627 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 1628 (sizeof(Element) * 8); 1629 ''' 1630 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode) 1631 vraddhnCode = ''' 1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1633 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1634 (sizeof(Element) * 8); 1635 ''' 1636 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode) 1637 1638 vsubCode = ''' 1639 destElem = srcElem1 - srcElem2; 1640 ''' 1641 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode) 1642 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode) 1643 vsublwCode = ''' 1644 destElem = (BigElement)srcElem1 - (BigElement)srcElem2; 1645 ''' 1646 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode) 1647 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode) 1648 1649 vqaddUCode = ''' 1650 destElem = srcElem1 + srcElem2; 1651 FPSCR fpscr = (FPSCR) FpscrQc; 1652 if (destElem < srcElem1 || destElem < srcElem2) { 1653 destElem = (Element)(-1); 1654 fpscr.qc = 1; 1655 } 1656 FpscrQc = fpscr; 1657 ''' 1658 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode) 1659 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode) 1660 vsubhnCode = ''' 1661 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 1662 (sizeof(Element) * 8); 1663 ''' 1664 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode) 1665 vrsubhnCode = ''' 1666 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1667 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1668 (sizeof(Element) * 8); 1669 ''' 1670 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode) 1671 1672 vqaddSCode = ''' 1673 destElem = srcElem1 + srcElem2; 1674 FPSCR fpscr = (FPSCR) FpscrQc; 1675 bool negDest = (destElem < 0); 1676 bool negSrc1 = (srcElem1 < 0); 1677 bool negSrc2 = (srcElem2 < 0); 1678 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 1679 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1680 if (negDest) 1681 destElem -= 1; 1682 fpscr.qc = 1; 1683 } 1684 FpscrQc = fpscr; 1685 ''' 1686 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode) 1687 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode) 1688 1689 vqsubUCode = ''' 1690 destElem = srcElem1 - srcElem2; 1691 FPSCR fpscr = (FPSCR) FpscrQc; 1692 if (destElem > srcElem1) { 1693 destElem = 0; 1694 fpscr.qc = 1; 1695 } 1696 FpscrQc = fpscr; 1697 ''' 1698 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode) 1699 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode) 1700 1701 vqsubSCode = ''' 1702 destElem = srcElem1 - srcElem2; 1703 FPSCR fpscr = (FPSCR) FpscrQc; 1704 bool negDest = (destElem < 0); 1705 bool negSrc1 = (srcElem1 < 0); 1706 bool posSrc2 = (srcElem2 >= 0); 1707 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 1708 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1709 if (negDest) 1710 destElem -= 1; 1711 fpscr.qc = 1; 1712 } 1713 FpscrQc = fpscr; 1714 ''' 1715 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode) 1716 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode) 1717 1718 vcgtCode = ''' 1719 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0; 1720 ''' 1721 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode) 1722 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode) 1723 1724 vcgeCode = ''' 1725 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0; 1726 ''' 1727 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode) 1728 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode) 1729 1730 vceqCode = ''' 1731 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0; 1732 ''' 1733 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode) 1734 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode) 1735 1736 vshlCode = ''' 1737 int16_t shiftAmt = (int8_t)srcElem2; 1738 if (shiftAmt < 0) { 1739 shiftAmt = -shiftAmt; 1740 if (shiftAmt >= sizeof(Element) * 8) { 1741 shiftAmt = sizeof(Element) * 8 - 1; 1742 destElem = 0; 1743 } else { 1744 destElem = (srcElem1 >> shiftAmt); 1745 } 1746 // Make sure the right shift sign extended when it should. 1747 if (ltz(srcElem1) && !ltz(destElem)) { 1748 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1749 1 - shiftAmt)); 1750 } 1751 } else { 1752 if (shiftAmt >= sizeof(Element) * 8) { 1753 destElem = 0; 1754 } else { 1755 destElem = srcElem1 << shiftAmt; 1756 } 1757 } 1758 ''' 1759 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode) 1760 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode) 1761 1762 vrshlCode = ''' 1763 int16_t shiftAmt = (int8_t)srcElem2; 1764 if (shiftAmt < 0) { 1765 shiftAmt = -shiftAmt; 1766 Element rBit = 0; 1767 if (shiftAmt <= sizeof(Element) * 8) 1768 rBit = bits(srcElem1, shiftAmt - 1); 1769 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 1770 rBit = 1; 1771 if (shiftAmt >= sizeof(Element) * 8) { 1772 shiftAmt = sizeof(Element) * 8 - 1; 1773 destElem = 0; 1774 } else { 1775 destElem = (srcElem1 >> shiftAmt); 1776 } 1777 // Make sure the right shift sign extended when it should. 1778 if (ltz(srcElem1) && !ltz(destElem)) { 1779 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1780 1 - shiftAmt)); 1781 } 1782 destElem += rBit; 1783 } else if (shiftAmt > 0) { 1784 if (shiftAmt >= sizeof(Element) * 8) { 1785 destElem = 0; 1786 } else { 1787 destElem = srcElem1 << shiftAmt; 1788 } 1789 } else { 1790 destElem = srcElem1; 1791 } 1792 ''' 1793 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode) 1794 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode) 1795 1796 vqshlUCode = ''' 1797 int16_t shiftAmt = (int8_t)srcElem2; 1798 FPSCR fpscr = (FPSCR) FpscrQc; 1799 if (shiftAmt < 0) { 1800 shiftAmt = -shiftAmt; 1801 if (shiftAmt >= sizeof(Element) * 8) { 1802 shiftAmt = sizeof(Element) * 8 - 1; 1803 destElem = 0; 1804 } else { 1805 destElem = (srcElem1 >> shiftAmt); 1806 } 1807 } else if (shiftAmt > 0) { 1808 if (shiftAmt >= sizeof(Element) * 8) { 1809 if (srcElem1 != 0) { 1810 destElem = mask(sizeof(Element) * 8); 1811 fpscr.qc = 1; 1812 } else { 1813 destElem = 0; 1814 } 1815 } else { 1816 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1817 sizeof(Element) * 8 - shiftAmt)) { 1818 destElem = mask(sizeof(Element) * 8); 1819 fpscr.qc = 1; 1820 } else { 1821 destElem = srcElem1 << shiftAmt; 1822 } 1823 } 1824 } else { 1825 destElem = srcElem1; 1826 } 1827 FpscrQc = fpscr; 1828 ''' 1829 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode) 1830 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode) 1831 1832 vqshlSCode = ''' 1833 int16_t shiftAmt = (int8_t)srcElem2; 1834 FPSCR fpscr = (FPSCR) FpscrQc; 1835 if (shiftAmt < 0) { 1836 shiftAmt = -shiftAmt; 1837 if (shiftAmt >= sizeof(Element) * 8) { 1838 shiftAmt = sizeof(Element) * 8 - 1; 1839 destElem = 0; 1840 } else { 1841 destElem = (srcElem1 >> shiftAmt); 1842 } 1843 // Make sure the right shift sign extended when it should. 1844 if (srcElem1 < 0 && destElem >= 0) { 1845 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1846 1 - shiftAmt)); 1847 } 1848 } else if (shiftAmt > 0) { 1849 bool sat = false; 1850 if (shiftAmt >= sizeof(Element) * 8) { 1851 if (srcElem1 != 0) 1852 sat = true; 1853 else 1854 destElem = 0; 1855 } else { 1856 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1857 sizeof(Element) * 8 - 1 - shiftAmt) != 1858 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1859 sat = true; 1860 } else { 1861 destElem = srcElem1 << shiftAmt; 1862 } 1863 } 1864 if (sat) { 1865 fpscr.qc = 1; 1866 destElem = mask(sizeof(Element) * 8 - 1); 1867 if (srcElem1 < 0) 1868 destElem = ~destElem; 1869 } 1870 } else { 1871 destElem = srcElem1; 1872 } 1873 FpscrQc = fpscr; 1874 ''' 1875 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode) 1876 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode) 1877 1878 vqrshlUCode = ''' 1879 int16_t shiftAmt = (int8_t)srcElem2; 1880 FPSCR fpscr = (FPSCR) FpscrQc; 1881 if (shiftAmt < 0) { 1882 shiftAmt = -shiftAmt; 1883 Element rBit = 0; 1884 if (shiftAmt <= sizeof(Element) * 8) 1885 rBit = bits(srcElem1, shiftAmt - 1); 1886 if (shiftAmt >= sizeof(Element) * 8) { 1887 shiftAmt = sizeof(Element) * 8 - 1; 1888 destElem = 0; 1889 } else { 1890 destElem = (srcElem1 >> shiftAmt); 1891 } 1892 destElem += rBit; 1893 } else { 1894 if (shiftAmt >= sizeof(Element) * 8) { 1895 if (srcElem1 != 0) { 1896 destElem = mask(sizeof(Element) * 8); 1897 fpscr.qc = 1; 1898 } else { 1899 destElem = 0; 1900 } 1901 } else { 1902 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1903 sizeof(Element) * 8 - shiftAmt)) { 1904 destElem = mask(sizeof(Element) * 8); 1905 fpscr.qc = 1; 1906 } else { 1907 destElem = srcElem1 << shiftAmt; 1908 } 1909 } 1910 } 1911 FpscrQc = fpscr; 1912 ''' 1913 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode) 1914 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode) 1915 1916 vqrshlSCode = ''' 1917 int16_t shiftAmt = (int8_t)srcElem2; 1918 FPSCR fpscr = (FPSCR) FpscrQc; 1919 if (shiftAmt < 0) { 1920 shiftAmt = -shiftAmt; 1921 Element rBit = 0; 1922 if (shiftAmt <= sizeof(Element) * 8) 1923 rBit = bits(srcElem1, shiftAmt - 1); 1924 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1925 rBit = 1; 1926 if (shiftAmt >= sizeof(Element) * 8) { 1927 shiftAmt = sizeof(Element) * 8 - 1; 1928 destElem = 0; 1929 } else { 1930 destElem = (srcElem1 >> shiftAmt); 1931 } 1932 // Make sure the right shift sign extended when it should. 1933 if (srcElem1 < 0 && destElem >= 0) { 1934 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1935 1 - shiftAmt)); 1936 } 1937 destElem += rBit; 1938 } else if (shiftAmt > 0) { 1939 bool sat = false; 1940 if (shiftAmt >= sizeof(Element) * 8) { 1941 if (srcElem1 != 0) 1942 sat = true; 1943 else 1944 destElem = 0; 1945 } else { 1946 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1947 sizeof(Element) * 8 - 1 - shiftAmt) != 1948 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1949 sat = true; 1950 } else { 1951 destElem = srcElem1 << shiftAmt; 1952 } 1953 } 1954 if (sat) { 1955 fpscr.qc = 1; 1956 destElem = mask(sizeof(Element) * 8 - 1); 1957 if (srcElem1 < 0) 1958 destElem = ~destElem; 1959 } 1960 } else { 1961 destElem = srcElem1; 1962 } 1963 FpscrQc = fpscr; 1964 ''' 1965 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode) 1966 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode) 1967 1968 vabaCode = ''' 1969 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1970 (srcElem2 - srcElem1); 1971 ''' 1972 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True) 1973 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True) 1974 vabalCode = ''' 1975 destElem += (srcElem1 > srcElem2) ? 1976 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1977 ((BigElement)srcElem2 - (BigElement)srcElem1); 1978 ''' 1979 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True) 1980 1981 vabdCode = ''' 1982 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1983 (srcElem2 - srcElem1); 1984 ''' 1985 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode) 1986 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode) 1987 vabdlCode = ''' 1988 destElem = (srcElem1 > srcElem2) ? 1989 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1990 ((BigElement)srcElem2 - (BigElement)srcElem1); 1991 ''' 1992 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode) 1993 1994 vtstCode = ''' 1995 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0; 1996 ''' 1997 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode) 1998 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode) 1999 2000 vmulCode = ''' 2001 destElem = srcElem1 * srcElem2; 2002 ''' 2003 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode) 2004 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode) 2005 vmullCode = ''' 2006 destElem = (BigElement)srcElem1 * (BigElement)srcElem2; 2007 ''' 2008 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode) 2009 2010 vmlaCode = ''' 2011 destElem = destElem + srcElem1 * srcElem2; 2012 ''' 2013 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True) 2014 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True) 2015 vmlalCode = ''' 2016 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2; 2017 ''' 2018 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True) 2019 2020 vqdmlalCode = ''' 2021 FPSCR fpscr = (FPSCR) FpscrQc; 2022 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2023 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2024 Element halfNeg = maxNeg / 2; 2025 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2026 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2027 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2028 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2029 fpscr.qc = 1; 2030 } 2031 bool negPreDest = ltz(destElem); 2032 destElem += midElem; 2033 bool negDest = ltz(destElem); 2034 bool negMid = ltz(midElem); 2035 if (negPreDest == negMid && negMid != negDest) { 2036 destElem = mask(sizeof(BigElement) * 8 - 1); 2037 if (negPreDest) 2038 destElem = ~destElem; 2039 fpscr.qc = 1; 2040 } 2041 FpscrQc = fpscr; 2042 ''' 2043 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True) 2044 2045 vqdmlslCode = ''' 2046 FPSCR fpscr = (FPSCR) FpscrQc; 2047 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2048 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2049 Element halfNeg = maxNeg / 2; 2050 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2051 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2052 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2053 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2054 fpscr.qc = 1; 2055 } 2056 bool negPreDest = ltz(destElem); 2057 destElem -= midElem; 2058 bool negDest = ltz(destElem); 2059 bool posMid = ltz((BigElement)-midElem); 2060 if (negPreDest == posMid && posMid != negDest) { 2061 destElem = mask(sizeof(BigElement) * 8 - 1); 2062 if (negPreDest) 2063 destElem = ~destElem; 2064 fpscr.qc = 1; 2065 } 2066 FpscrQc = fpscr; 2067 ''' 2068 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True) 2069 2070 vqdmullCode = ''' 2071 FPSCR fpscr = (FPSCR) FpscrQc; 2072 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2073 if (srcElem1 == srcElem2 && 2074 srcElem1 == (Element)((Element)1 << 2075 (Element)(sizeof(Element) * 8 - 1))) { 2076 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2077 fpscr.qc = 1; 2078 } 2079 FpscrQc = fpscr; 2080 ''' 2081 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode) 2082 2083 vmlsCode = ''' 2084 destElem = destElem - srcElem1 * srcElem2; 2085 ''' 2086 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) 2087 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) 2088 vmlslCode = ''' 2089 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2; 2090 ''' 2091 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True) 2092 2093 vmulpCode = ''' 2094 destElem = 0; 2095 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2096 if (bits(srcElem2, j)) 2097 destElem ^= srcElem1 << j; 2098 } 2099 ''' 2100 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode) 2101 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode) 2102 vmullpCode = ''' 2103 destElem = 0; 2104 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2105 if (bits(srcElem2, j)) 2106 destElem ^= (BigElement)srcElem1 << j; 2107 } 2108 ''' 2109 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode) 2110 2111 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True) 2112 2113 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True) 2114 2115 vqdmulhCode = ''' 2116 FPSCR fpscr = (FPSCR) FpscrQc; 2117 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2118 (sizeof(Element) * 8); 2119 if (srcElem1 == srcElem2 && 2120 srcElem1 == (Element)((Element)1 << 2121 (sizeof(Element) * 8 - 1))) { 2122 destElem = ~srcElem1; 2123 fpscr.qc = 1; 2124 } 2125 FpscrQc = fpscr; 2126 ''' 2127 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) 2128 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) 2129 2130 vqrdmulhCode = ''' 2131 FPSCR fpscr = (FPSCR) FpscrQc; 2132 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2133 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2134 (sizeof(Element) * 8); 2135 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2136 Element halfNeg = maxNeg / 2; 2137 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2138 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2139 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2140 if (destElem < 0) { 2141 destElem = mask(sizeof(Element) * 8 - 1); 2142 } else { 2143 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2144 } 2145 fpscr.qc = 1; 2146 } 2147 FpscrQc = fpscr; 2148 ''' 2149 threeEqualRegInst("vqrdmulh", "VqrdmulhD", 2150 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) 2151 threeEqualRegInst("vqrdmulh", "VqrdmulhQ", 2152 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) 2153 2154 vmaxfpCode = ''' 2155 FPSCR fpscr = (FPSCR) FpscrExc; 2156 bool done; 2157 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2158 if (!done) { 2159 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS, 2160 true, true, VfpRoundNearest); 2161 } else if (flushToZero(srcReg1, srcReg2)) { 2162 fpscr.idc = 1; 2163 } 2164 FpscrExc = fpscr; 2165 ''' 2166 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode) 2167 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode) 2168 2169 vminfpCode = ''' 2170 FPSCR fpscr = (FPSCR) FpscrExc; 2171 bool done; 2172 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2173 if (!done) { 2174 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS, 2175 true, true, VfpRoundNearest); 2176 } else if (flushToZero(srcReg1, srcReg2)) { 2177 fpscr.idc = 1; 2178 } 2179 FpscrExc = fpscr; 2180 ''' 2181 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode) 2182 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode) 2183 2184 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",), 2185 2, vmaxfpCode, pairwise=True) 2186 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",), 2187 4, vmaxfpCode, pairwise=True) 2188 2189 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",), 2190 2, vminfpCode, pairwise=True) 2191 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",), 2192 4, vminfpCode, pairwise=True) 2193 2194 vaddfpCode = ''' 2195 FPSCR fpscr = (FPSCR) FpscrExc; 2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, 2197 true, true, VfpRoundNearest); 2198 FpscrExc = fpscr; 2199 ''' 2200 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode) 2201 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode) 2202 2203 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",), 2204 2, vaddfpCode, pairwise=True) 2205 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",), 2206 4, vaddfpCode, pairwise=True) 2207 2208 vsubfpCode = ''' 2209 FPSCR fpscr = (FPSCR) FpscrExc; 2210 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2211 true, true, VfpRoundNearest); 2212 FpscrExc = fpscr; 2213 ''' 2214 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode) 2215 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode) 2216 2217 vmulfpCode = ''' 2218 FPSCR fpscr = (FPSCR) FpscrExc; 2219 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2220 true, true, VfpRoundNearest); 2221 FpscrExc = fpscr; 2222 ''' 2223 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) 2224 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) 2225 2226 vmlafpCode = ''' 2227 FPSCR fpscr = (FPSCR) FpscrExc; 2228 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2229 true, true, VfpRoundNearest); 2230 destReg = binaryOp(fpscr, mid, destReg, fpAddS, 2231 true, true, VfpRoundNearest); 2232 FpscrExc = fpscr; 2233 ''' 2234 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) 2235 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) 2236 2237 vmlsfpCode = ''' 2238 FPSCR fpscr = (FPSCR) FpscrExc; 2239 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2240 true, true, VfpRoundNearest); 2241 destReg = binaryOp(fpscr, destReg, mid, fpSubS, 2242 true, true, VfpRoundNearest); 2243 FpscrExc = fpscr; 2244 ''' 2245 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) 2246 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) 2247 2248 vcgtfpCode = ''' 2249 FPSCR fpscr = (FPSCR) FpscrExc; 2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, 2251 true, true, VfpRoundNearest); 2252 destReg = (res == 0) ? -1 : 0; 2253 if (res == 2.0) 2254 fpscr.ioc = 1; 2255 FpscrExc = fpscr; 2256 ''' 2257 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",), 2258 2, vcgtfpCode, toInt = True) 2259 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",), 2260 4, vcgtfpCode, toInt = True) 2261 2262 vcgefpCode = ''' 2263 FPSCR fpscr = (FPSCR) FpscrExc; 2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, 2265 true, true, VfpRoundNearest); 2266 destReg = (res == 0) ? -1 : 0; 2267 if (res == 2.0) 2268 fpscr.ioc = 1; 2269 FpscrExc = fpscr; 2270 ''' 2271 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",), 2272 2, vcgefpCode, toInt = True) 2273 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",), 2274 4, vcgefpCode, toInt = True) 2275 2276 vacgtfpCode = ''' 2277 FPSCR fpscr = (FPSCR) FpscrExc; 2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, 2279 true, true, VfpRoundNearest); 2280 destReg = (res == 0) ? -1 : 0; 2281 if (res == 2.0) 2282 fpscr.ioc = 1; 2283 FpscrExc = fpscr; 2284 ''' 2285 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",), 2286 2, vacgtfpCode, toInt = True) 2287 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",), 2288 4, vacgtfpCode, toInt = True) 2289 2290 vacgefpCode = ''' 2291 FPSCR fpscr = (FPSCR) FpscrExc; 2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, 2293 true, true, VfpRoundNearest); 2294 destReg = (res == 0) ? -1 : 0; 2295 if (res == 2.0) 2296 fpscr.ioc = 1; 2297 FpscrExc = fpscr; 2298 ''' 2299 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",), 2300 2, vacgefpCode, toInt = True) 2301 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",), 2302 4, vacgefpCode, toInt = True) 2303 2304 vceqfpCode = ''' 2305 FPSCR fpscr = (FPSCR) FpscrExc; 2306 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, 2307 true, true, VfpRoundNearest); 2308 destReg = (res == 0) ? -1 : 0; 2309 if (res == 2.0) 2310 fpscr.ioc = 1; 2311 FpscrExc = fpscr; 2312 ''' 2313 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",), 2314 2, vceqfpCode, toInt = True) 2315 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",), 2316 4, vceqfpCode, toInt = True) 2317 2318 vrecpsCode = ''' 2319 FPSCR fpscr = (FPSCR) FpscrExc; 2320 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, 2321 true, true, VfpRoundNearest); 2322 FpscrExc = fpscr; 2323 ''' 2324 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode) 2325 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode) 2326 2327 vrsqrtsCode = ''' 2328 FPSCR fpscr = (FPSCR) FpscrExc; 2329 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, 2330 true, true, VfpRoundNearest); 2331 FpscrExc = fpscr; 2332 ''' 2333 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode) 2334 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode) 2335 2336 vabdfpCode = ''' 2337 FPSCR fpscr = (FPSCR) FpscrExc; 2338 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2339 true, true, VfpRoundNearest); 2340 destReg = fabs(mid); 2341 FpscrExc = fpscr; 2342 ''' 2343 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode) 2344 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode) 2345 2346 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True) 2347 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True) 2348 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) 2349 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) 2350 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True) 2351 2352 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) 2353 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) 2354 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) 2355 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) 2356 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True) 2357 2358 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode) 2359 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode) 2360 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) 2361 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) 2362 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode) 2363 2364 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode) 2365 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True) 2366 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True) 2367 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) 2368 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) 2369 twoEqualRegInst("vqrdmulh", "VqrdmulhsD", 2370 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) 2371 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", 2372 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) 2373 2374 vshrCode = ''' 2375 if (imm >= sizeof(srcElem1) * 8) { 2376 if (ltz(srcElem1)) 2377 destElem = -1; 2378 else 2379 destElem = 0; 2380 } else { 2381 destElem = srcElem1 >> imm; 2382 } 2383 ''' 2384 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode) 2385 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode) 2386 2387 vsraCode = ''' 2388 Element mid;; 2389 if (imm >= sizeof(srcElem1) * 8) { 2390 mid = ltz(srcElem1) ? -1 : 0; 2391 } else { 2392 mid = srcElem1 >> imm; 2393 if (ltz(srcElem1) && !ltz(mid)) { 2394 mid |= -(mid & ((Element)1 << 2395 (sizeof(Element) * 8 - 1 - imm))); 2396 } 2397 } 2398 destElem += mid; 2399 ''' 2400 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True) 2401 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True) 2402 2403 vrshrCode = ''' 2404 if (imm > sizeof(srcElem1) * 8) { 2405 destElem = 0; 2406 } else if (imm) { 2407 Element rBit = bits(srcElem1, imm - 1); 2408 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2409 } else { 2410 destElem = srcElem1; 2411 } 2412 ''' 2413 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode) 2414 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode) 2415 2416 vrsraCode = ''' 2417 if (imm > sizeof(srcElem1) * 8) { 2418 destElem += 0; 2419 } else if (imm) { 2420 Element rBit = bits(srcElem1, imm - 1); 2421 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2422 } else { 2423 destElem += srcElem1; 2424 } 2425 ''' 2426 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True) 2427 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True) 2428 2429 vsriCode = ''' 2430 if (imm >= sizeof(Element) * 8) 2431 destElem = destElem; 2432 else 2433 destElem = (srcElem1 >> imm) | 2434 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2435 ''' 2436 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True) 2437 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True) 2438 2439 vshlCode = ''' 2440 if (imm >= sizeof(Element) * 8) 2441 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 2442 else 2443 destElem = srcElem1 << imm; 2444 ''' 2445 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode) 2446 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode) 2447 2448 vsliCode = ''' 2449 if (imm >= sizeof(Element) * 8) 2450 destElem = destElem; 2451 else 2452 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 2453 ''' 2454 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True) 2455 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True) 2456 2457 vqshlCode = ''' 2458 FPSCR fpscr = (FPSCR) FpscrQc; 2459 if (imm >= sizeof(Element) * 8) { 2460 if (srcElem1 != 0) { 2461 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2462 if (srcElem1 > 0) 2463 destElem = ~destElem; 2464 fpscr.qc = 1; 2465 } else { 2466 destElem = 0; 2467 } 2468 } else if (imm) { 2469 destElem = (srcElem1 << imm); 2470 uint64_t topBits = bits((uint64_t)srcElem1, 2471 sizeof(Element) * 8 - 1, 2472 sizeof(Element) * 8 - 1 - imm); 2473 if (topBits != 0 && topBits != mask(imm + 1)) { 2474 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2475 if (srcElem1 > 0) 2476 destElem = ~destElem; 2477 fpscr.qc = 1; 2478 } 2479 } else { 2480 destElem = srcElem1; 2481 } 2482 FpscrQc = fpscr; 2483 ''' 2484 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode) 2485 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode) 2486 2487 vqshluCode = ''' 2488 FPSCR fpscr = (FPSCR) FpscrQc; 2489 if (imm >= sizeof(Element) * 8) { 2490 if (srcElem1 != 0) { 2491 destElem = mask(sizeof(Element) * 8); 2492 fpscr.qc = 1; 2493 } else { 2494 destElem = 0; 2495 } 2496 } else if (imm) { 2497 destElem = (srcElem1 << imm); 2498 uint64_t topBits = bits((uint64_t)srcElem1, 2499 sizeof(Element) * 8 - 1, 2500 sizeof(Element) * 8 - imm); 2501 if (topBits != 0) { 2502 destElem = mask(sizeof(Element) * 8); 2503 fpscr.qc = 1; 2504 } 2505 } else { 2506 destElem = srcElem1; 2507 } 2508 FpscrQc = fpscr; 2509 ''' 2510 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode) 2511 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode) 2512 2513 vqshlusCode = ''' 2514 FPSCR fpscr = (FPSCR) FpscrQc; 2515 if (imm >= sizeof(Element) * 8) { 2516 if (srcElem1 < 0) { 2517 destElem = 0; 2518 fpscr.qc = 1; 2519 } else if (srcElem1 > 0) { 2520 destElem = mask(sizeof(Element) * 8); 2521 fpscr.qc = 1; 2522 } else { 2523 destElem = 0; 2524 } 2525 } else if (imm) { 2526 destElem = (srcElem1 << imm); 2527 uint64_t topBits = bits((uint64_t)srcElem1, 2528 sizeof(Element) * 8 - 1, 2529 sizeof(Element) * 8 - imm); 2530 if (srcElem1 < 0) { 2531 destElem = 0; 2532 fpscr.qc = 1; 2533 } else if (topBits != 0) { 2534 destElem = mask(sizeof(Element) * 8); 2535 fpscr.qc = 1; 2536 } 2537 } else { 2538 if (srcElem1 < 0) { 2539 fpscr.qc = 1; 2540 destElem = 0; 2541 } else { 2542 destElem = srcElem1; 2543 } 2544 } 2545 FpscrQc = fpscr; 2546 ''' 2547 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode) 2548 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode) 2549 2550 vshrnCode = ''' 2551 if (imm >= sizeof(srcElem1) * 8) { 2552 destElem = 0; 2553 } else { 2554 destElem = srcElem1 >> imm; 2555 } 2556 ''' 2557 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode) 2558 2559 vrshrnCode = ''' 2560 if (imm > sizeof(srcElem1) * 8) { 2561 destElem = 0; 2562 } else if (imm) { 2563 Element rBit = bits(srcElem1, imm - 1); 2564 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2565 } else { 2566 destElem = srcElem1; 2567 } 2568 ''' 2569 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode) 2570 2571 vqshrnCode = ''' 2572 FPSCR fpscr = (FPSCR) FpscrQc; 2573 if (imm > sizeof(srcElem1) * 8) { 2574 if (srcElem1 != 0 && srcElem1 != -1) 2575 fpscr.qc = 1; 2576 destElem = 0; 2577 } else if (imm) { 2578 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2579 mid |= -(mid & ((BigElement)1 << 2580 (sizeof(BigElement) * 8 - 1 - imm))); 2581 if (mid != (Element)mid) { 2582 destElem = mask(sizeof(Element) * 8 - 1); 2583 if (srcElem1 < 0) 2584 destElem = ~destElem; 2585 fpscr.qc = 1; 2586 } else { 2587 destElem = mid; 2588 } 2589 } else { 2590 destElem = srcElem1; 2591 } 2592 FpscrQc = fpscr; 2593 ''' 2594 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode) 2595 2596 vqshrunCode = ''' 2597 FPSCR fpscr = (FPSCR) FpscrQc; 2598 if (imm > sizeof(srcElem1) * 8) { 2599 if (srcElem1 != 0) 2600 fpscr.qc = 1; 2601 destElem = 0; 2602 } else if (imm) { 2603 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2604 if (mid != (Element)mid) { 2605 destElem = mask(sizeof(Element) * 8); 2606 fpscr.qc = 1; 2607 } else { 2608 destElem = mid; 2609 } 2610 } else { 2611 destElem = srcElem1; 2612 } 2613 FpscrQc = fpscr; 2614 ''' 2615 twoRegNarrowShiftInst("vqshrun", "NVqshrun", 2616 "SimdShiftOp", smallUnsignedTypes, vqshrunCode) 2617 2618 vqshrunsCode = ''' 2619 FPSCR fpscr = (FPSCR) FpscrQc; 2620 if (imm > sizeof(srcElem1) * 8) { 2621 if (srcElem1 != 0) 2622 fpscr.qc = 1; 2623 destElem = 0; 2624 } else if (imm) { 2625 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2626 if (bits(mid, sizeof(BigElement) * 8 - 1, 2627 sizeof(Element) * 8) != 0) { 2628 if (srcElem1 < 0) { 2629 destElem = 0; 2630 } else { 2631 destElem = mask(sizeof(Element) * 8); 2632 } 2633 fpscr.qc = 1; 2634 } else { 2635 destElem = mid; 2636 } 2637 } else { 2638 destElem = srcElem1; 2639 } 2640 FpscrQc = fpscr; 2641 ''' 2642 twoRegNarrowShiftInst("vqshrun", "NVqshruns", 2643 "SimdShiftOp", smallSignedTypes, vqshrunsCode) 2644 2645 vqrshrnCode = ''' 2646 FPSCR fpscr = (FPSCR) FpscrQc; 2647 if (imm > sizeof(srcElem1) * 8) { 2648 if (srcElem1 != 0 && srcElem1 != -1) 2649 fpscr.qc = 1; 2650 destElem = 0; 2651 } else if (imm) { 2652 BigElement mid = (srcElem1 >> (imm - 1)); 2653 uint64_t rBit = mid & 0x1; 2654 mid >>= 1; 2655 mid |= -(mid & ((BigElement)1 << 2656 (sizeof(BigElement) * 8 - 1 - imm))); 2657 mid += rBit; 2658 if (mid != (Element)mid) { 2659 destElem = mask(sizeof(Element) * 8 - 1); 2660 if (srcElem1 < 0) 2661 destElem = ~destElem; 2662 fpscr.qc = 1; 2663 } else { 2664 destElem = mid; 2665 } 2666 } else { 2667 if (srcElem1 != (Element)srcElem1) { 2668 destElem = mask(sizeof(Element) * 8 - 1); 2669 if (srcElem1 < 0) 2670 destElem = ~destElem; 2671 fpscr.qc = 1; 2672 } else { 2673 destElem = srcElem1; 2674 } 2675 } 2676 FpscrQc = fpscr; 2677 ''' 2678 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", 2679 "SimdShiftOp", smallSignedTypes, vqrshrnCode) 2680 2681 vqrshrunCode = ''' 2682 FPSCR fpscr = (FPSCR) FpscrQc; 2683 if (imm > sizeof(srcElem1) * 8) { 2684 if (srcElem1 != 0) 2685 fpscr.qc = 1; 2686 destElem = 0; 2687 } else if (imm) { 2688 BigElement mid = (srcElem1 >> (imm - 1)); 2689 uint64_t rBit = mid & 0x1; 2690 mid >>= 1; 2691 mid += rBit; 2692 if (mid != (Element)mid) { 2693 destElem = mask(sizeof(Element) * 8); 2694 fpscr.qc = 1; 2695 } else { 2696 destElem = mid; 2697 } 2698 } else { 2699 if (srcElem1 != (Element)srcElem1) { 2700 destElem = mask(sizeof(Element) * 8 - 1); 2701 fpscr.qc = 1; 2702 } else { 2703 destElem = srcElem1; 2704 } 2705 } 2706 FpscrQc = fpscr; 2707 ''' 2708 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", 2709 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode) 2710 2711 vqrshrunsCode = ''' 2712 FPSCR fpscr = (FPSCR) FpscrQc; 2713 if (imm > sizeof(srcElem1) * 8) { 2714 if (srcElem1 != 0) 2715 fpscr.qc = 1; 2716 destElem = 0; 2717 } else if (imm) { 2718 BigElement mid = (srcElem1 >> (imm - 1)); 2719 uint64_t rBit = mid & 0x1; 2720 mid >>= 1; 2721 mid |= -(mid & ((BigElement)1 << 2722 (sizeof(BigElement) * 8 - 1 - imm))); 2723 mid += rBit; 2724 if (bits(mid, sizeof(BigElement) * 8 - 1, 2725 sizeof(Element) * 8) != 0) { 2726 if (srcElem1 < 0) { 2727 destElem = 0; 2728 } else { 2729 destElem = mask(sizeof(Element) * 8); 2730 } 2731 fpscr.qc = 1; 2732 } else { 2733 destElem = mid; 2734 } 2735 } else { 2736 if (srcElem1 < 0) { 2737 fpscr.qc = 1; 2738 destElem = 0; 2739 } else { 2740 destElem = srcElem1; 2741 } 2742 } 2743 FpscrQc = fpscr; 2744 ''' 2745 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", 2746 "SimdShiftOp", smallSignedTypes, vqrshrunsCode) 2747 2748 vshllCode = ''' 2749 if (imm >= sizeof(destElem) * 8) { 2750 destElem = 0; 2751 } else { 2752 destElem = (BigElement)srcElem1 << imm; 2753 } 2754 ''' 2755 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode) 2756 2757 vmovlCode = ''' 2758 destElem = srcElem1; 2759 ''' 2760 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode) 2761 2762 vcvt2ufxCode = ''' 2763 FPSCR fpscr = (FPSCR) FpscrExc; 2764 if (flushToZero(srcElem1)) 2765 fpscr.idc = 1; 2766 VfpSavedState state = prepFpState(VfpRoundNearest); 2767 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2768 destReg = vfpFpSToFixed(srcElem1, false, false, imm); 2769 __asm__ __volatile__("" :: "m" (destReg)); 2770 finishVfp(fpscr, state, true); 2771 FpscrExc = fpscr; 2772 ''' 2773 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",), 2774 2, vcvt2ufxCode, toInt = True) 2775 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",), 2776 4, vcvt2ufxCode, toInt = True) 2777 2778 vcvt2sfxCode = ''' 2779 FPSCR fpscr = (FPSCR) FpscrExc; 2780 if (flushToZero(srcElem1)) 2781 fpscr.idc = 1; 2782 VfpSavedState state = prepFpState(VfpRoundNearest); 2783 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2784 destReg = vfpFpSToFixed(srcElem1, true, false, imm); 2785 __asm__ __volatile__("" :: "m" (destReg)); 2786 finishVfp(fpscr, state, true); 2787 FpscrExc = fpscr; 2788 ''' 2789 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",), 2790 2, vcvt2sfxCode, toInt = True) 2791 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",), 2792 4, vcvt2sfxCode, toInt = True) 2793 2794 vcvtu2fpCode = ''' 2795 FPSCR fpscr = (FPSCR) FpscrExc; 2796 VfpSavedState state = prepFpState(VfpRoundNearest); 2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2798 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); 2799 __asm__ __volatile__("" :: "m" (destElem)); 2800 finishVfp(fpscr, state, true); 2801 FpscrExc = fpscr; 2802 ''' 2803 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",), 2804 2, vcvtu2fpCode, fromInt = True) 2805 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",), 2806 4, vcvtu2fpCode, fromInt = True) 2807 2808 vcvts2fpCode = ''' 2809 FPSCR fpscr = (FPSCR) FpscrExc; 2810 VfpSavedState state = prepFpState(VfpRoundNearest); 2811 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2812 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); 2813 __asm__ __volatile__("" :: "m" (destElem)); 2814 finishVfp(fpscr, state, true); 2815 FpscrExc = fpscr; 2816 ''' 2817 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",), 2818 2, vcvts2fpCode, fromInt = True) 2819 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",), 2820 4, vcvts2fpCode, fromInt = True) 2821 2822 vcvts2hCode = ''' 2823 destElem = 0; 2824 FPSCR fpscr = (FPSCR) FpscrExc; 2825 float srcFp1 = bitsToFp(srcElem1, (float)0.0); 2826 if (flushToZero(srcFp1)) 2827 fpscr.idc = 1; 2828 VfpSavedState state = prepFpState(VfpRoundNearest); 2829 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem) 2830 : "m" (srcFp1), "m" (destElem)); 2831 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest, 2832 fpscr.ahp, srcFp1); 2833 __asm__ __volatile__("" :: "m" (destElem)); 2834 finishVfp(fpscr, state, true); 2835 FpscrExc = fpscr; 2836 ''' 2837 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode) 2838 2839 vcvth2sCode = ''' 2840 destElem = 0; 2841 FPSCR fpscr = (FPSCR) FpscrExc; 2842 VfpSavedState state = prepFpState(VfpRoundNearest); 2843 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) 2844 : "m" (srcElem1), "m" (destElem)); 2845 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); 2846 __asm__ __volatile__("" :: "m" (destElem)); 2847 finishVfp(fpscr, state, true); 2848 FpscrExc = fpscr; 2849 ''' 2850 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) 2851 2852 vrsqrteCode = ''' 2853 destElem = unsignedRSqrtEstimate(srcElem1); 2854 ''' 2855 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode) 2856 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode) 2857 2858 vrsqrtefpCode = ''' 2859 FPSCR fpscr = (FPSCR) FpscrExc; 2860 if (flushToZero(srcReg1)) 2861 fpscr.idc = 1; 2862 destReg = fprSqrtEstimate(fpscr, srcReg1); 2863 FpscrExc = fpscr; 2864 ''' 2865 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode) 2866 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode) 2867 2868 vrecpeCode = ''' 2869 destElem = unsignedRecipEstimate(srcElem1); 2870 ''' 2871 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode) 2872 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode) 2873 2874 vrecpefpCode = ''' 2875 FPSCR fpscr = (FPSCR) FpscrExc; 2876 if (flushToZero(srcReg1)) 2877 fpscr.idc = 1; 2878 destReg = fpRecipEstimate(fpscr, srcReg1); 2879 FpscrExc = fpscr; 2880 ''' 2881 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode) 2882 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode) 2883 2884 vrev16Code = ''' 2885 destElem = srcElem1; 2886 unsigned groupSize = ((1 << 1) / sizeof(Element)); 2887 unsigned reverseMask = (groupSize - 1); 2888 j = i ^ reverseMask; 2889 ''' 2890 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code) 2891 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code) 2892 vrev32Code = ''' 2893 destElem = srcElem1; 2894 unsigned groupSize = ((1 << 2) / sizeof(Element)); 2895 unsigned reverseMask = (groupSize - 1); 2896 j = i ^ reverseMask; 2897 ''' 2898 twoRegMiscInst("vrev32", "NVrev32D", 2899 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code) 2900 twoRegMiscInst("vrev32", "NVrev32Q", 2901 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code) 2902 vrev64Code = ''' 2903 destElem = srcElem1; 2904 unsigned groupSize = ((1 << 3) / sizeof(Element)); 2905 unsigned reverseMask = (groupSize - 1); 2906 j = i ^ reverseMask; 2907 ''' 2908 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code) 2909 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code) 2910 2911 vpaddlCode = ''' 2912 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 2913 ''' 2914 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode) 2915 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode) 2916 2917 vpadalCode = ''' 2918 destElem += (BigElement)srcElem1 + (BigElement)srcElem2; 2919 ''' 2920 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True) 2921 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True) 2922 2923 vclsCode = ''' 2924 unsigned count = 0; 2925 if (srcElem1 < 0) { 2926 srcElem1 <<= 1; 2927 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 2928 count++; 2929 srcElem1 <<= 1; 2930 } 2931 } else { 2932 srcElem1 <<= 1; 2933 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 2934 count++; 2935 srcElem1 <<= 1; 2936 } 2937 } 2938 destElem = count; 2939 ''' 2940 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode) 2941 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode) 2942 2943 vclzCode = ''' 2944 unsigned count = 0; 2945 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 2946 count++; 2947 srcElem1 <<= 1; 2948 } 2949 destElem = count; 2950 ''' 2951 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode) 2952 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode) 2953 2954 vcntCode = ''' 2955 unsigned count = 0; 2956 while (srcElem1 && count < sizeof(Element) * 8) { 2957 count += srcElem1 & 0x1; 2958 srcElem1 >>= 1; 2959 } 2960 destElem = count; 2961 ''' 2962 2963 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode) 2964 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode) 2965 2966 vmvnCode = ''' 2967 destElem = ~srcElem1; 2968 ''' 2969 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) 2970 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) 2971 2972 vqabsCode = ''' 2973 FPSCR fpscr = (FPSCR) FpscrQc; 2974 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2975 fpscr.qc = 1; 2976 destElem = ~srcElem1; 2977 } else if (srcElem1 < 0) { 2978 destElem = -srcElem1; 2979 } else { 2980 destElem = srcElem1; 2981 } 2982 FpscrQc = fpscr; 2983 ''' 2984 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode) 2985 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode) 2986 2987 vqnegCode = ''' 2988 FPSCR fpscr = (FPSCR) FpscrQc; 2989 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2990 fpscr.qc = 1; 2991 destElem = ~srcElem1; 2992 } else { 2993 destElem = -srcElem1; 2994 } 2995 FpscrQc = fpscr; 2996 ''' 2997 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode) 2998 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode) 2999 3000 vabsCode = ''' 3001 if (srcElem1 < 0) { 3002 destElem = -srcElem1; 3003 } else { 3004 destElem = srcElem1; 3005 } 3006 ''' 3007 3008 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode) 3009 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode) 3010 vabsfpCode = ''' 3011 union 3012 { 3013 uint32_t i; 3014 float f; 3015 } cStruct; 3016 cStruct.f = srcReg1; 3017 cStruct.i &= mask(sizeof(Element) * 8 - 1); 3018 destReg = cStruct.f; 3019 ''' 3020 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode) 3021 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode) 3022 3023 vnegCode = ''' 3024 destElem = -srcElem1; 3025 ''' 3026 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode) 3027 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode) 3028 vnegfpCode = ''' 3029 destReg = -srcReg1; 3030 ''' 3031 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode) 3032 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode) 3033 3034 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;' 3035 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode) 3036 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode) 3037 vcgtfpCode = ''' 3038 FPSCR fpscr = (FPSCR) FpscrExc; 3039 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc, 3040 true, true, VfpRoundNearest); 3041 destReg = (res == 0) ? -1 : 0; 3042 if (res == 2.0) 3043 fpscr.ioc = 1; 3044 FpscrExc = fpscr; 3045 ''' 3046 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",), 3047 2, vcgtfpCode, toInt = True) 3048 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",), 3049 4, vcgtfpCode, toInt = True) 3050 3051 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;' 3052 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode) 3053 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode) 3054 vcgefpCode = ''' 3055 FPSCR fpscr = (FPSCR) FpscrExc; 3056 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc, 3057 true, true, VfpRoundNearest); 3058 destReg = (res == 0) ? -1 : 0; 3059 if (res == 2.0) 3060 fpscr.ioc = 1; 3061 FpscrExc = fpscr; 3062 ''' 3063 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",), 3064 2, vcgefpCode, toInt = True) 3065 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",), 3066 4, vcgefpCode, toInt = True) 3067 3068 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;' 3069 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode) 3070 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode) 3071 vceqfpCode = ''' 3072 FPSCR fpscr = (FPSCR) FpscrExc; 3073 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc, 3074 true, true, VfpRoundNearest); 3075 destReg = (res == 0) ? -1 : 0; 3076 if (res == 2.0) 3077 fpscr.ioc = 1; 3078 FpscrExc = fpscr; 3079 ''' 3080 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",), 3081 2, vceqfpCode, toInt = True) 3082 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",), 3083 4, vceqfpCode, toInt = True) 3084 3085 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;' 3086 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode) 3087 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode) 3088 vclefpCode = ''' 3089 FPSCR fpscr = (FPSCR) FpscrExc; 3090 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc, 3091 true, true, VfpRoundNearest); 3092 destReg = (res == 0) ? -1 : 0; 3093 if (res == 2.0) 3094 fpscr.ioc = 1; 3095 FpscrExc = fpscr; 3096 ''' 3097 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",), 3098 2, vclefpCode, toInt = True) 3099 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",), 3100 4, vclefpCode, toInt = True) 3101 3102 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;' 3103 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode) 3104 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode) 3105 vcltfpCode = ''' 3106 FPSCR fpscr = (FPSCR) FpscrExc; 3107 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc, 3108 true, true, VfpRoundNearest); 3109 destReg = (res == 0) ? -1 : 0; 3110 if (res == 2.0) 3111 fpscr.ioc = 1; 3112 FpscrExc = fpscr; 3113 ''' 3114 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",), 3115 2, vcltfpCode, toInt = True) 3116 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",), 3117 4, vcltfpCode, toInt = True) 3118 3119 vswpCode = ''' 3120 FloatRegBits mid; 3121 for (unsigned r = 0; r < rCount; r++) { 3122 mid = srcReg1.regs[r]; 3123 srcReg1.regs[r] = destReg.regs[r]; 3124 destReg.regs[r] = mid; 3125 } 3126 ''' 3127 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode) 3128 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode) 3129 3130 vtrnCode = ''' 3131 Element mid; 3132 for (unsigned i = 0; i < eCount; i += 2) { 3133 mid = srcReg1.elements[i]; 3134 srcReg1.elements[i] = destReg.elements[i + 1]; 3135 destReg.elements[i + 1] = mid; 3136 } 3137 ''' 3138 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", 3139 smallUnsignedTypes, 2, vtrnCode) 3140 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", 3141 smallUnsignedTypes, 4, vtrnCode) 3142 3143 vuzpCode = ''' 3144 Element mid[eCount]; 3145 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3146 for (unsigned i = 0; i < eCount / 2; i++) { 3147 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3148 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3149 destReg.elements[i] = destReg.elements[2 * i]; 3150 } 3151 for (unsigned i = 0; i < eCount / 2; i++) { 3152 destReg.elements[eCount / 2 + i] = mid[2 * i]; 3153 } 3154 ''' 3155 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode) 3156 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode) 3157 3158 vzipCode = ''' 3159 Element mid[eCount]; 3160 memcpy(&mid, &destReg, sizeof(destReg)); 3161 for (unsigned i = 0; i < eCount / 2; i++) { 3162 destReg.elements[2 * i] = mid[i]; 3163 destReg.elements[2 * i + 1] = srcReg1.elements[i]; 3164 } 3165 for (int i = 0; i < eCount / 2; i++) { 3166 srcReg1.elements[2 * i] = mid[eCount / 2 + i]; 3167 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i]; 3168 } 3169 ''' 3170 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode) 3171 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode) 3172 3173 vmovnCode = 'destElem = srcElem1;' 3174 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode) 3175 3176 vdupCode = 'destElem = srcElem1;' 3177 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode) 3178 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode) 3179 3180 def vdupGprInst(name, Name, opClass, types, rCount): 3181 global header_output, exec_output 3182 eWalkCode = ''' 3183 RegVect destReg; 3184 for (unsigned i = 0; i < eCount; i++) { 3185 destReg.elements[i] = htog((Element)Op1); 3186 } 3187 ''' 3188 for reg in range(rCount): 3189 eWalkCode += ''' 3190 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3191 ''' % { "reg" : reg } 3192 iop = InstObjParams(name, Name, 3193 "RegRegOp", 3194 { "code": eWalkCode, 3195 "r_count": rCount, 3196 "predicate_test": predicateTest, 3197 "op_class": opClass }, []) 3198 header_output += NeonRegRegOpDeclare.subst(iop) 3199 exec_output += NeonEqualRegExecute.subst(iop) 3200 for type in types: 3201 substDict = { "targs" : type, 3202 "class_name" : Name } 3203 exec_output += NeonExecDeclare.subst(substDict) 3204 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2) 3205 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4) 3206 3207 vmovCode = 'destElem = imm;' 3208 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode) 3209 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode) 3210 3211 vorrCode = 'destElem |= imm;' 3212 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True) 3213 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True) 3214 3215 vmvnCode = 'destElem = ~imm;' 3216 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) 3217 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) 3218 3219 vbicCode = 'destElem &= ~imm;' 3220 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True) 3221 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True) 3222 3223 vqmovnCode = ''' 3224 FPSCR fpscr = (FPSCR) FpscrQc; 3225 destElem = srcElem1; 3226 if ((BigElement)destElem != srcElem1) { 3227 fpscr.qc = 1; 3228 destElem = mask(sizeof(Element) * 8 - 1); 3229 if (srcElem1 < 0) 3230 destElem = ~destElem; 3231 } 3232 FpscrQc = fpscr; 3233 ''' 3234 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode) 3235 3236 vqmovunCode = ''' 3237 FPSCR fpscr = (FPSCR) FpscrQc; 3238 destElem = srcElem1; 3239 if ((BigElement)destElem != srcElem1) { 3240 fpscr.qc = 1; 3241 destElem = mask(sizeof(Element) * 8); 3242 } 3243 FpscrQc = fpscr; 3244 ''' 3245 twoRegNarrowMiscInst("vqmovun", "NVqmovun", 3246 "SimdMiscOp", smallUnsignedTypes, vqmovunCode) 3247 3248 vqmovunsCode = ''' 3249 FPSCR fpscr = (FPSCR) FpscrQc; 3250 destElem = srcElem1; 3251 if (srcElem1 < 0 || 3252 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 3253 fpscr.qc = 1; 3254 destElem = mask(sizeof(Element) * 8); 3255 if (srcElem1 < 0) 3256 destElem = ~destElem; 3257 } 3258 FpscrQc = fpscr; 3259 ''' 3260 twoRegNarrowMiscInst("vqmovun", "NVqmovuns", 3261 "SimdMiscOp", smallSignedTypes, vqmovunsCode) 3262 3263 def buildVext(name, Name, opClass, types, rCount, op): 3264 global header_output, exec_output 3265 eWalkCode = ''' 3266 RegVect srcReg1, srcReg2, destReg; 3267 ''' 3268 for reg in range(rCount): 3269 eWalkCode += simdEnabledCheckCode + ''' 3270 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 3271 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 3272 ''' % { "reg" : reg } 3273 eWalkCode += op 3274 for reg in range(rCount): 3275 eWalkCode += ''' 3276 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3277 ''' % { "reg" : reg } 3278 iop = InstObjParams(name, Name, 3279 "RegRegRegImmOp", 3280 { "code": eWalkCode, 3281 "r_count": rCount, 3282 "predicate_test": predicateTest, 3283 "op_class": opClass }, []) 3284 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 3285 exec_output += NeonEqualRegExecute.subst(iop) 3286 for type in types: 3287 substDict = { "targs" : type, 3288 "class_name" : Name } 3289 exec_output += NeonExecDeclare.subst(substDict) 3290 3291 vextCode = ''' 3292 for (unsigned i = 0; i < eCount; i++) { 3293 unsigned index = i + imm; 3294 if (index < eCount) { 3295 destReg.elements[i] = srcReg1.elements[index]; 3296 } else { 3297 index -= eCount; 3298 if (index >= eCount) { 3299 if (FullSystem) 3300 fault = new UndefinedInstruction; 3301 else 3302 fault = new UndefinedInstruction(false, mnemonic); 3303 } else { 3304 destReg.elements[i] = srcReg2.elements[index]; 3305 } 3306 } 3307 } 3308 ''' 3309 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode) 3310 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode) 3311 3312 def buildVtbxl(name, Name, opClass, length, isVtbl): 3313 global header_output, decoder_output, exec_output 3314 code = ''' 3315 union 3316 { 3317 uint8_t bytes[32]; 3318 FloatRegBits regs[8]; 3319 } table; 3320 3321 union 3322 { 3323 uint8_t bytes[8]; 3324 FloatRegBits regs[2]; 3325 } destReg, srcReg2; 3326 3327 const unsigned length = %(length)d; 3328 const bool isVtbl = %(isVtbl)s; 3329 3330 srcReg2.regs[0] = htog(FpOp2P0_uw); 3331 srcReg2.regs[1] = htog(FpOp2P1_uw); 3332 3333 destReg.regs[0] = htog(FpDestP0_uw); 3334 destReg.regs[1] = htog(FpDestP1_uw); 3335 ''' % { "length" : length, "isVtbl" : isVtbl } 3336 for reg in range(8): 3337 if reg < length * 2: 3338 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \ 3339 { "reg" : reg } 3340 else: 3341 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg } 3342 code += ''' 3343 for (unsigned i = 0; i < sizeof(destReg); i++) { 3344 uint8_t index = srcReg2.bytes[i]; 3345 if (index < 8 * length) { 3346 destReg.bytes[i] = table.bytes[index]; 3347 } else { 3348 if (isVtbl) 3349 destReg.bytes[i] = 0; 3350 // else destReg.bytes[i] unchanged 3351 } 3352 } 3353 3354 FpDestP0_uw = gtoh(destReg.regs[0]); 3355 FpDestP1_uw = gtoh(destReg.regs[1]); 3356 ''' 3357 iop = InstObjParams(name, Name, 3358 "RegRegRegOp", 3359 { "code": code, 3360 "predicate_test": predicateTest, 3361 "op_class": opClass }, []) 3362 header_output += RegRegRegOpDeclare.subst(iop) 3363 decoder_output += RegRegRegOpConstructor.subst(iop) 3364 exec_output += PredOpExecute.subst(iop) 3365 3366 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true") 3367 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true") 3368 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true") 3369 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true") 3370 3371 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false") 3372 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false") 3373 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false") 3374 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false") 3375}}; 3376