neon.isa revision 7639:8c09b7ff5b57
1// -*- mode:c++ -*- 2 3// Copyright (c) 2010 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Gabe Black 39 40output header {{ 41 template <template <typename T> class Base> 42 StaticInstPtr 43 decodeNeonUThreeUReg(unsigned size, 44 ExtMachInst machInst, IntRegIndex dest, 45 IntRegIndex op1, IntRegIndex op2) 46 { 47 switch (size) { 48 case 0: 49 return new Base<uint8_t>(machInst, dest, op1, op2); 50 case 1: 51 return new Base<uint16_t>(machInst, dest, op1, op2); 52 case 2: 53 return new Base<uint32_t>(machInst, dest, op1, op2); 54 case 3: 55 return new Base<uint64_t>(machInst, dest, op1, op2); 56 default: 57 return new Unknown(machInst); 58 } 59 } 60 61 template <template <typename T> class Base> 62 StaticInstPtr 63 decodeNeonSThreeUReg(unsigned size, 64 ExtMachInst machInst, IntRegIndex dest, 65 IntRegIndex op1, IntRegIndex op2) 66 { 67 switch (size) { 68 case 0: 69 return new Base<int8_t>(machInst, dest, op1, op2); 70 case 1: 71 return new Base<int16_t>(machInst, dest, op1, op2); 72 case 2: 73 return new Base<int32_t>(machInst, dest, op1, op2); 74 case 3: 75 return new Base<int64_t>(machInst, dest, op1, op2); 76 default: 77 return new Unknown(machInst); 78 } 79 } 80 81 template <template <typename T> class Base> 82 StaticInstPtr 83 decodeNeonUSThreeUReg(bool notSigned, unsigned size, 84 ExtMachInst machInst, IntRegIndex dest, 85 IntRegIndex op1, IntRegIndex op2) 86 { 87 if (notSigned) { 88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2); 89 } else { 90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2); 91 } 92 } 93 94 template <template <typename T> class Base> 95 StaticInstPtr 96 decodeNeonUThreeUSReg(unsigned size, 97 ExtMachInst machInst, IntRegIndex dest, 98 IntRegIndex op1, IntRegIndex op2) 99 { 100 switch (size) { 101 case 0: 102 return new Base<uint8_t>(machInst, dest, op1, op2); 103 case 1: 104 return new Base<uint16_t>(machInst, dest, op1, op2); 105 case 2: 106 return new Base<uint32_t>(machInst, dest, op1, op2); 107 default: 108 return new Unknown(machInst); 109 } 110 } 111 112 template <template <typename T> class Base> 113 StaticInstPtr 114 decodeNeonSThreeUSReg(unsigned size, 115 ExtMachInst machInst, IntRegIndex dest, 116 IntRegIndex op1, IntRegIndex op2) 117 { 118 switch (size) { 119 case 0: 120 return new Base<int8_t>(machInst, dest, op1, op2); 121 case 1: 122 return new Base<int16_t>(machInst, dest, op1, op2); 123 case 2: 124 return new Base<int32_t>(machInst, dest, op1, op2); 125 default: 126 return new Unknown(machInst); 127 } 128 } 129 130 template <template <typename T> class Base> 131 StaticInstPtr 132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size, 133 ExtMachInst machInst, IntRegIndex dest, 134 IntRegIndex op1, IntRegIndex op2) 135 { 136 if (notSigned) { 137 return decodeNeonUThreeUSReg<Base>( 138 size, machInst, dest, op1, op2); 139 } else { 140 return decodeNeonSThreeUSReg<Base>( 141 size, machInst, dest, op1, op2); 142 } 143 } 144 145 template <template <typename T> class BaseD, 146 template <typename T> class BaseQ> 147 StaticInstPtr 148 decodeNeonUThreeSReg(bool q, unsigned size, 149 ExtMachInst machInst, IntRegIndex dest, 150 IntRegIndex op1, IntRegIndex op2) 151 { 152 if (q) { 153 return decodeNeonUThreeUSReg<BaseQ>( 154 size, machInst, dest, op1, op2); 155 } else { 156 return decodeNeonUThreeUSReg<BaseD>( 157 size, machInst, dest, op1, op2); 158 } 159 } 160 161 template <template <typename T> class BaseD, 162 template <typename T> class BaseQ> 163 StaticInstPtr 164 decodeNeonSThreeSReg(bool q, unsigned size, 165 ExtMachInst machInst, IntRegIndex dest, 166 IntRegIndex op1, IntRegIndex op2) 167 { 168 if (q) { 169 return decodeNeonSThreeUSReg<BaseQ>( 170 size, machInst, dest, op1, op2); 171 } else { 172 return decodeNeonSThreeUSReg<BaseD>( 173 size, machInst, dest, op1, op2); 174 } 175 } 176 177 template <template <typename T> class BaseD, 178 template <typename T> class BaseQ> 179 StaticInstPtr 180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, 181 ExtMachInst machInst, IntRegIndex dest, 182 IntRegIndex op1, IntRegIndex op2) 183 { 184 if (notSigned) { 185 return decodeNeonUThreeSReg<BaseD, BaseQ>( 186 q, size, machInst, dest, op1, op2); 187 } else { 188 return decodeNeonSThreeSReg<BaseD, BaseQ>( 189 q, size, machInst, dest, op1, op2); 190 } 191 } 192 193 template <template <typename T> class BaseD, 194 template <typename T> class BaseQ> 195 StaticInstPtr 196 decodeNeonUThreeReg(bool q, unsigned size, 197 ExtMachInst machInst, IntRegIndex dest, 198 IntRegIndex op1, IntRegIndex op2) 199 { 200 if (q) { 201 return decodeNeonUThreeUReg<BaseQ>( 202 size, machInst, dest, op1, op2); 203 } else { 204 return decodeNeonUThreeUReg<BaseD>( 205 size, machInst, dest, op1, op2); 206 } 207 } 208 209 template <template <typename T> class BaseD, 210 template <typename T> class BaseQ> 211 StaticInstPtr 212 decodeNeonSThreeReg(bool q, unsigned size, 213 ExtMachInst machInst, IntRegIndex dest, 214 IntRegIndex op1, IntRegIndex op2) 215 { 216 if (q) { 217 return decodeNeonSThreeUReg<BaseQ>( 218 size, machInst, dest, op1, op2); 219 } else { 220 return decodeNeonSThreeUReg<BaseD>( 221 size, machInst, dest, op1, op2); 222 } 223 } 224 225 template <template <typename T> class BaseD, 226 template <typename T> class BaseQ> 227 StaticInstPtr 228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size, 229 ExtMachInst machInst, IntRegIndex dest, 230 IntRegIndex op1, IntRegIndex op2) 231 { 232 if (notSigned) { 233 return decodeNeonUThreeReg<BaseD, BaseQ>( 234 q, size, machInst, dest, op1, op2); 235 } else { 236 return decodeNeonSThreeReg<BaseD, BaseQ>( 237 q, size, machInst, dest, op1, op2); 238 } 239 } 240 241 template <template <typename T> class BaseD, 242 template <typename T> class BaseQ> 243 StaticInstPtr 244 decodeNeonUTwoShiftReg(bool q, unsigned size, 245 ExtMachInst machInst, IntRegIndex dest, 246 IntRegIndex op1, uint64_t imm) 247 { 248 if (q) { 249 switch (size) { 250 case 0: 251 return new BaseQ<uint8_t>(machInst, dest, op1, imm); 252 case 1: 253 return new BaseQ<uint16_t>(machInst, dest, op1, imm); 254 case 2: 255 return new BaseQ<uint32_t>(machInst, dest, op1, imm); 256 case 3: 257 return new BaseQ<uint64_t>(machInst, dest, op1, imm); 258 default: 259 return new Unknown(machInst); 260 } 261 } else { 262 switch (size) { 263 case 0: 264 return new BaseD<uint8_t>(machInst, dest, op1, imm); 265 case 1: 266 return new BaseD<uint16_t>(machInst, dest, op1, imm); 267 case 2: 268 return new BaseD<uint32_t>(machInst, dest, op1, imm); 269 case 3: 270 return new BaseD<uint64_t>(machInst, dest, op1, imm); 271 default: 272 return new Unknown(machInst); 273 } 274 } 275 } 276 277 template <template <typename T> class BaseD, 278 template <typename T> class BaseQ> 279 StaticInstPtr 280 decodeNeonSTwoShiftReg(bool q, unsigned size, 281 ExtMachInst machInst, IntRegIndex dest, 282 IntRegIndex op1, uint64_t imm) 283 { 284 if (q) { 285 switch (size) { 286 case 0: 287 return new BaseQ<int8_t>(machInst, dest, op1, imm); 288 case 1: 289 return new BaseQ<int16_t>(machInst, dest, op1, imm); 290 case 2: 291 return new BaseQ<int32_t>(machInst, dest, op1, imm); 292 case 3: 293 return new BaseQ<int64_t>(machInst, dest, op1, imm); 294 default: 295 return new Unknown(machInst); 296 } 297 } else { 298 switch (size) { 299 case 0: 300 return new BaseD<int8_t>(machInst, dest, op1, imm); 301 case 1: 302 return new BaseD<int16_t>(machInst, dest, op1, imm); 303 case 2: 304 return new BaseD<int32_t>(machInst, dest, op1, imm); 305 case 3: 306 return new BaseD<int64_t>(machInst, dest, op1, imm); 307 default: 308 return new Unknown(machInst); 309 } 310 } 311 } 312 313 314 template <template <typename T> class BaseD, 315 template <typename T> class BaseQ> 316 StaticInstPtr 317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size, 318 ExtMachInst machInst, IntRegIndex dest, 319 IntRegIndex op1, uint64_t imm) 320 { 321 if (notSigned) { 322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>( 323 q, size, machInst, dest, op1, imm); 324 } else { 325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>( 326 q, size, machInst, dest, op1, imm); 327 } 328 } 329 330 template <template <typename T> class Base> 331 StaticInstPtr 332 decodeNeonUTwoShiftUSReg(unsigned size, 333 ExtMachInst machInst, IntRegIndex dest, 334 IntRegIndex op1, uint64_t imm) 335 { 336 switch (size) { 337 case 0: 338 return new Base<uint8_t>(machInst, dest, op1, imm); 339 case 1: 340 return new Base<uint16_t>(machInst, dest, op1, imm); 341 case 2: 342 return new Base<uint32_t>(machInst, dest, op1, imm); 343 default: 344 return new Unknown(machInst); 345 } 346 } 347 348 template <template <typename T> class BaseD, 349 template <typename T> class BaseQ> 350 StaticInstPtr 351 decodeNeonUTwoShiftSReg(bool q, unsigned size, 352 ExtMachInst machInst, IntRegIndex dest, 353 IntRegIndex op1, uint64_t imm) 354 { 355 if (q) { 356 return decodeNeonUTwoShiftUSReg<BaseQ>( 357 size, machInst, dest, op1, imm); 358 } else { 359 return decodeNeonUTwoShiftUSReg<BaseD>( 360 size, machInst, dest, op1, imm); 361 } 362 } 363 364 template <template <typename T> class Base> 365 StaticInstPtr 366 decodeNeonSTwoShiftUSReg(unsigned size, 367 ExtMachInst machInst, IntRegIndex dest, 368 IntRegIndex op1, uint64_t imm) 369 { 370 switch (size) { 371 case 0: 372 return new Base<int8_t>(machInst, dest, op1, imm); 373 case 1: 374 return new Base<int16_t>(machInst, dest, op1, imm); 375 case 2: 376 return new Base<int32_t>(machInst, dest, op1, imm); 377 default: 378 return new Unknown(machInst); 379 } 380 } 381 382 template <template <typename T> class BaseD, 383 template <typename T> class BaseQ> 384 StaticInstPtr 385 decodeNeonSTwoShiftSReg(bool q, unsigned size, 386 ExtMachInst machInst, IntRegIndex dest, 387 IntRegIndex op1, uint64_t imm) 388 { 389 if (q) { 390 return decodeNeonSTwoShiftUSReg<BaseQ>( 391 size, machInst, dest, op1, imm); 392 } else { 393 return decodeNeonSTwoShiftUSReg<BaseD>( 394 size, machInst, dest, op1, imm); 395 } 396 } 397 398 template <template <typename T> class BaseD, 399 template <typename T> class BaseQ> 400 StaticInstPtr 401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size, 402 ExtMachInst machInst, IntRegIndex dest, 403 IntRegIndex op1, uint64_t imm) 404 { 405 if (notSigned) { 406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 407 q, size, machInst, dest, op1, imm); 408 } else { 409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 410 q, size, machInst, dest, op1, imm); 411 } 412 } 413 414 template <template <typename T> class Base> 415 StaticInstPtr 416 decodeNeonUTwoMiscUSReg(unsigned size, 417 ExtMachInst machInst, IntRegIndex dest, 418 IntRegIndex op1) 419 { 420 switch (size) { 421 case 0: 422 return new Base<uint8_t>(machInst, dest, op1); 423 case 1: 424 return new Base<uint16_t>(machInst, dest, op1); 425 case 2: 426 return new Base<uint32_t>(machInst, dest, op1); 427 default: 428 return new Unknown(machInst); 429 } 430 } 431 432 template <template <typename T> class Base> 433 StaticInstPtr 434 decodeNeonSTwoMiscUSReg(unsigned size, 435 ExtMachInst machInst, IntRegIndex dest, 436 IntRegIndex op1) 437 { 438 switch (size) { 439 case 0: 440 return new Base<int8_t>(machInst, dest, op1); 441 case 1: 442 return new Base<int16_t>(machInst, dest, op1); 443 case 2: 444 return new Base<int32_t>(machInst, dest, op1); 445 default: 446 return new Unknown(machInst); 447 } 448 } 449 450 template <template <typename T> class BaseD, 451 template <typename T> class BaseQ> 452 StaticInstPtr 453 decodeNeonUTwoMiscSReg(bool q, unsigned size, 454 ExtMachInst machInst, IntRegIndex dest, 455 IntRegIndex op1) 456 { 457 if (q) { 458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 459 } else { 460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 461 } 462 } 463 464 template <template <typename T> class BaseD, 465 template <typename T> class BaseQ> 466 StaticInstPtr 467 decodeNeonSTwoMiscSReg(bool q, unsigned size, 468 ExtMachInst machInst, IntRegIndex dest, 469 IntRegIndex op1) 470 { 471 if (q) { 472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 473 } else { 474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 475 } 476 } 477 478 template <template <typename T> class Base> 479 StaticInstPtr 480 decodeNeonUTwoMiscUReg(unsigned size, 481 ExtMachInst machInst, IntRegIndex dest, 482 IntRegIndex op1) 483 { 484 switch (size) { 485 case 0: 486 return new Base<uint8_t>(machInst, dest, op1); 487 case 1: 488 return new Base<uint16_t>(machInst, dest, op1); 489 case 2: 490 return new Base<uint32_t>(machInst, dest, op1); 491 case 3: 492 return new Base<uint64_t>(machInst, dest, op1); 493 default: 494 return new Unknown(machInst); 495 } 496 } 497 498 template <template <typename T> class Base> 499 StaticInstPtr 500 decodeNeonSTwoMiscUReg(unsigned size, 501 ExtMachInst machInst, IntRegIndex dest, 502 IntRegIndex op1) 503 { 504 switch (size) { 505 case 0: 506 return new Base<int8_t>(machInst, dest, op1); 507 case 1: 508 return new Base<int16_t>(machInst, dest, op1); 509 case 2: 510 return new Base<int32_t>(machInst, dest, op1); 511 case 3: 512 return new Base<int64_t>(machInst, dest, op1); 513 default: 514 return new Unknown(machInst); 515 } 516 } 517 518 template <template <typename T> class BaseD, 519 template <typename T> class BaseQ> 520 StaticInstPtr 521 decodeNeonSTwoMiscReg(bool q, unsigned size, 522 ExtMachInst machInst, IntRegIndex dest, 523 IntRegIndex op1) 524 { 525 if (q) { 526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 527 } else { 528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1); 529 } 530 } 531 532 template <template <typename T> class BaseD, 533 template <typename T> class BaseQ> 534 StaticInstPtr 535 decodeNeonUTwoMiscReg(bool q, unsigned size, 536 ExtMachInst machInst, IntRegIndex dest, 537 IntRegIndex op1) 538 { 539 if (q) { 540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 541 } else { 542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1); 543 } 544 } 545 546 template <template <typename T> class BaseD, 547 template <typename T> class BaseQ> 548 StaticInstPtr 549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size, 550 ExtMachInst machInst, IntRegIndex dest, 551 IntRegIndex op1) 552 { 553 if (notSigned) { 554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 555 q, size, machInst, dest, op1); 556 } else { 557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 558 q, size, machInst, dest, op1); 559 } 560 } 561 562}}; 563 564output exec {{ 565 static float 566 vcgtFunc(float op1, float op2) 567 { 568 if (isSnan(op1) || isSnan(op2)) 569 return 2.0; 570 return (op1 > op2) ? 0.0 : 1.0; 571 } 572 573 static float 574 vcgeFunc(float op1, float op2) 575 { 576 if (isSnan(op1) || isSnan(op2)) 577 return 2.0; 578 return (op1 >= op2) ? 0.0 : 1.0; 579 } 580 581 static float 582 vceqFunc(float op1, float op2) 583 { 584 if (isSnan(op1) || isSnan(op2)) 585 return 2.0; 586 return (op1 == op2) ? 0.0 : 1.0; 587 } 588 589 static float 590 vcleFunc(float op1, float op2) 591 { 592 if (isSnan(op1) || isSnan(op2)) 593 return 2.0; 594 return (op1 <= op2) ? 0.0 : 1.0; 595 } 596 597 static float 598 vcltFunc(float op1, float op2) 599 { 600 if (isSnan(op1) || isSnan(op2)) 601 return 2.0; 602 return (op1 < op2) ? 0.0 : 1.0; 603 } 604 605 static float 606 vacgtFunc(float op1, float op2) 607 { 608 if (isSnan(op1) || isSnan(op2)) 609 return 2.0; 610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0; 611 } 612 613 static float 614 vacgeFunc(float op1, float op2) 615 { 616 if (isSnan(op1) || isSnan(op2)) 617 return 2.0; 618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0; 619 } 620}}; 621 622let {{ 623 624 header_output = "" 625 exec_output = "" 626 627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t") 628 unsignedTypes = smallUnsignedTypes + ("uint64_t",) 629 smallSignedTypes = ("int8_t", "int16_t", "int32_t") 630 signedTypes = smallSignedTypes + ("int64_t",) 631 smallTypes = smallUnsignedTypes + smallSignedTypes 632 allTypes = unsignedTypes + signedTypes 633 634 def threeEqualRegInst(name, Name, types, rCount, op, 635 readDest=False, pairwise=False): 636 global header_output, exec_output 637 eWalkCode = ''' 638 RegVect srcReg1, srcReg2, destReg; 639 ''' 640 for reg in range(rCount): 641 eWalkCode += ''' 642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 644 ''' % { "reg" : reg } 645 if readDest: 646 eWalkCode += ''' 647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 648 ''' % { "reg" : reg } 649 readDestCode = '' 650 if readDest: 651 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 652 if pairwise: 653 eWalkCode += ''' 654 for (unsigned i = 0; i < eCount; i++) { 655 Element srcElem1 = gtoh(2 * i < eCount ? 656 srcReg1.elements[2 * i] : 657 srcReg2.elements[2 * i - eCount]); 658 Element srcElem2 = gtoh(2 * i < eCount ? 659 srcReg1.elements[2 * i + 1] : 660 srcReg2.elements[2 * i + 1 - eCount]); 661 Element destElem; 662 %(readDest)s 663 %(op)s 664 destReg.elements[i] = htog(destElem); 665 } 666 ''' % { "op" : op, "readDest" : readDestCode } 667 else: 668 eWalkCode += ''' 669 for (unsigned i = 0; i < eCount; i++) { 670 Element srcElem1 = gtoh(srcReg1.elements[i]); 671 Element srcElem2 = gtoh(srcReg2.elements[i]); 672 Element destElem; 673 %(readDest)s 674 %(op)s 675 destReg.elements[i] = htog(destElem); 676 } 677 ''' % { "op" : op, "readDest" : readDestCode } 678 for reg in range(rCount): 679 eWalkCode += ''' 680 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 681 ''' % { "reg" : reg } 682 iop = InstObjParams(name, Name, 683 "RegRegRegOp", 684 { "code": eWalkCode, 685 "r_count": rCount, 686 "predicate_test": predicateTest }, []) 687 header_output += NeonRegRegRegOpDeclare.subst(iop) 688 exec_output += NeonEqualRegExecute.subst(iop) 689 for type in types: 690 substDict = { "targs" : type, 691 "class_name" : Name } 692 exec_output += NeonExecDeclare.subst(substDict) 693 694 def threeEqualRegInstFp(name, Name, types, rCount, op, 695 readDest=False, pairwise=False, toInt=False): 696 global header_output, exec_output 697 eWalkCode = ''' 698 typedef FloatReg FloatVect[rCount]; 699 FloatVect srcRegs1, srcRegs2; 700 ''' 701 if toInt: 702 eWalkCode += 'RegVect destRegs;\n' 703 else: 704 eWalkCode += 'FloatVect destRegs;\n' 705 for reg in range(rCount): 706 eWalkCode += ''' 707 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 708 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 709 ''' % { "reg" : reg } 710 if readDest: 711 if toInt: 712 eWalkCode += ''' 713 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 714 ''' % { "reg" : reg } 715 else: 716 eWalkCode += ''' 717 destRegs[%(reg)d] = FpDestP%(reg)d; 718 ''' % { "reg" : reg } 719 readDestCode = '' 720 if readDest: 721 readDestCode = 'destReg = destRegs[r];' 722 destType = 'FloatReg' 723 writeDest = 'destRegs[r] = destReg;' 724 if toInt: 725 destType = 'FloatRegBits' 726 writeDest = 'destRegs.regs[r] = destReg;' 727 if pairwise: 728 eWalkCode += ''' 729 for (unsigned r = 0; r < rCount; r++) { 730 FloatReg srcReg1 = (2 * r < rCount) ? 731 srcRegs1[2 * r] : srcRegs2[2 * r - rCount]; 732 FloatReg srcReg2 = (2 * r < rCount) ? 733 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount]; 734 %(destType)s destReg; 735 %(readDest)s 736 %(op)s 737 %(writeDest)s 738 } 739 ''' % { "op" : op, 740 "readDest" : readDestCode, 741 "destType" : destType, 742 "writeDest" : writeDest } 743 else: 744 eWalkCode += ''' 745 for (unsigned r = 0; r < rCount; r++) { 746 FloatReg srcReg1 = srcRegs1[r]; 747 FloatReg srcReg2 = srcRegs2[r]; 748 %(destType)s destReg; 749 %(readDest)s 750 %(op)s 751 %(writeDest)s 752 } 753 ''' % { "op" : op, 754 "readDest" : readDestCode, 755 "destType" : destType, 756 "writeDest" : writeDest } 757 for reg in range(rCount): 758 if toInt: 759 eWalkCode += ''' 760 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d]; 761 ''' % { "reg" : reg } 762 else: 763 eWalkCode += ''' 764 FpDestP%(reg)d = destRegs[%(reg)d]; 765 ''' % { "reg" : reg } 766 iop = InstObjParams(name, Name, 767 "FpRegRegRegOp", 768 { "code": eWalkCode, 769 "r_count": rCount, 770 "predicate_test": predicateTest }, []) 771 header_output += NeonRegRegRegOpDeclare.subst(iop) 772 exec_output += NeonEqualRegExecute.subst(iop) 773 for type in types: 774 substDict = { "targs" : type, 775 "class_name" : Name } 776 exec_output += NeonExecDeclare.subst(substDict) 777 778 def threeUnequalRegInst(name, Name, types, op, 779 bigSrc1, bigSrc2, bigDest, readDest): 780 global header_output, exec_output 781 src1Cnt = src2Cnt = destCnt = 2 782 src1Prefix = src2Prefix = destPrefix = '' 783 if bigSrc1: 784 src1Cnt = 4 785 src1Prefix = 'Big' 786 if bigSrc2: 787 src2Cnt = 4 788 src2Prefix = 'Big' 789 if bigDest: 790 destCnt = 4 791 destPrefix = 'Big' 792 eWalkCode = ''' 793 %sRegVect srcReg1; 794 %sRegVect srcReg2; 795 %sRegVect destReg; 796 ''' % (src1Prefix, src2Prefix, destPrefix) 797 for reg in range(src1Cnt): 798 eWalkCode += ''' 799 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 800 ''' % { "reg" : reg } 801 for reg in range(src2Cnt): 802 eWalkCode += ''' 803 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 804 ''' % { "reg" : reg } 805 if readDest: 806 for reg in range(destCnt): 807 eWalkCode += ''' 808 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 809 ''' % { "reg" : reg } 810 readDestCode = '' 811 if readDest: 812 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 813 eWalkCode += ''' 814 for (unsigned i = 0; i < eCount; i++) { 815 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 816 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]); 817 %(destPrefix)sElement destElem; 818 %(readDest)s 819 %(op)s 820 destReg.elements[i] = htog(destElem); 821 } 822 ''' % { "op" : op, "readDest" : readDestCode, 823 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 824 "destPrefix" : destPrefix } 825 for reg in range(destCnt): 826 eWalkCode += ''' 827 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 828 ''' % { "reg" : reg } 829 iop = InstObjParams(name, Name, 830 "RegRegRegOp", 831 { "code": eWalkCode, 832 "r_count": 2, 833 "predicate_test": predicateTest }, []) 834 header_output += NeonRegRegRegOpDeclare.subst(iop) 835 exec_output += NeonUnequalRegExecute.subst(iop) 836 for type in types: 837 substDict = { "targs" : type, 838 "class_name" : Name } 839 exec_output += NeonExecDeclare.subst(substDict) 840 841 def threeRegNarrowInst(name, Name, types, op, readDest=False): 842 threeUnequalRegInst(name, Name, types, op, 843 True, True, False, readDest) 844 845 def threeRegLongInst(name, Name, types, op, readDest=False): 846 threeUnequalRegInst(name, Name, types, op, 847 False, False, True, readDest) 848 849 def threeRegWideInst(name, Name, types, op, readDest=False): 850 threeUnequalRegInst(name, Name, types, op, 851 True, False, True, readDest) 852 853 def twoEqualRegInst(name, Name, types, rCount, op, readDest=False): 854 global header_output, exec_output 855 eWalkCode = ''' 856 RegVect srcReg1, srcReg2, destReg; 857 ''' 858 for reg in range(rCount): 859 eWalkCode += ''' 860 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 861 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 862 ''' % { "reg" : reg } 863 if readDest: 864 eWalkCode += ''' 865 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 866 ''' % { "reg" : reg } 867 readDestCode = '' 868 if readDest: 869 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 870 eWalkCode += ''' 871 assert(imm >= 0 && imm < eCount); 872 for (unsigned i = 0; i < eCount; i++) { 873 Element srcElem1 = gtoh(srcReg1.elements[i]); 874 Element srcElem2 = gtoh(srcReg2.elements[imm]); 875 Element destElem; 876 %(readDest)s 877 %(op)s 878 destReg.elements[i] = htog(destElem); 879 } 880 ''' % { "op" : op, "readDest" : readDestCode } 881 for reg in range(rCount): 882 eWalkCode += ''' 883 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 884 ''' % { "reg" : reg } 885 iop = InstObjParams(name, Name, 886 "RegRegRegImmOp", 887 { "code": eWalkCode, 888 "r_count": rCount, 889 "predicate_test": predicateTest }, []) 890 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 891 exec_output += NeonEqualRegExecute.subst(iop) 892 for type in types: 893 substDict = { "targs" : type, 894 "class_name" : Name } 895 exec_output += NeonExecDeclare.subst(substDict) 896 897 def twoRegLongInst(name, Name, types, op, readDest=False): 898 global header_output, exec_output 899 rCount = 2 900 eWalkCode = ''' 901 RegVect srcReg1, srcReg2; 902 BigRegVect destReg; 903 ''' 904 for reg in range(rCount): 905 eWalkCode += ''' 906 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 907 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);; 908 ''' % { "reg" : reg } 909 if readDest: 910 for reg in range(2 * rCount): 911 eWalkCode += ''' 912 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 913 ''' % { "reg" : reg } 914 readDestCode = '' 915 if readDest: 916 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 917 eWalkCode += ''' 918 assert(imm >= 0 && imm < eCount); 919 for (unsigned i = 0; i < eCount; i++) { 920 Element srcElem1 = gtoh(srcReg1.elements[i]); 921 Element srcElem2 = gtoh(srcReg2.elements[imm]); 922 BigElement destElem; 923 %(readDest)s 924 %(op)s 925 destReg.elements[i] = htog(destElem); 926 } 927 ''' % { "op" : op, "readDest" : readDestCode } 928 for reg in range(2 * rCount): 929 eWalkCode += ''' 930 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 931 ''' % { "reg" : reg } 932 iop = InstObjParams(name, Name, 933 "RegRegRegImmOp", 934 { "code": eWalkCode, 935 "r_count": rCount, 936 "predicate_test": predicateTest }, []) 937 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 938 exec_output += NeonUnequalRegExecute.subst(iop) 939 for type in types: 940 substDict = { "targs" : type, 941 "class_name" : Name } 942 exec_output += NeonExecDeclare.subst(substDict) 943 944 def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False): 945 global header_output, exec_output 946 eWalkCode = ''' 947 typedef FloatReg FloatVect[rCount]; 948 FloatVect srcRegs1, srcRegs2, destRegs; 949 ''' 950 for reg in range(rCount): 951 eWalkCode += ''' 952 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 953 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 954 ''' % { "reg" : reg } 955 if readDest: 956 eWalkCode += ''' 957 destRegs[%(reg)d] = FpDestP%(reg)d; 958 ''' % { "reg" : reg } 959 readDestCode = '' 960 if readDest: 961 readDestCode = 'destReg = destRegs[i];' 962 eWalkCode += ''' 963 assert(imm >= 0 && imm < rCount); 964 for (unsigned i = 0; i < rCount; i++) { 965 FloatReg srcReg1 = srcRegs1[i]; 966 FloatReg srcReg2 = srcRegs2[imm]; 967 FloatReg destReg; 968 %(readDest)s 969 %(op)s 970 destRegs[i] = destReg; 971 } 972 ''' % { "op" : op, "readDest" : readDestCode } 973 for reg in range(rCount): 974 eWalkCode += ''' 975 FpDestP%(reg)d = destRegs[%(reg)d]; 976 ''' % { "reg" : reg } 977 iop = InstObjParams(name, Name, 978 "FpRegRegRegImmOp", 979 { "code": eWalkCode, 980 "r_count": rCount, 981 "predicate_test": predicateTest }, []) 982 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 983 exec_output += NeonEqualRegExecute.subst(iop) 984 for type in types: 985 substDict = { "targs" : type, 986 "class_name" : Name } 987 exec_output += NeonExecDeclare.subst(substDict) 988 989 def twoRegShiftInst(name, Name, types, rCount, op, 990 readDest=False, toInt=False, fromInt=False): 991 global header_output, exec_output 992 eWalkCode = ''' 993 RegVect srcRegs1, destRegs; 994 ''' 995 for reg in range(rCount): 996 eWalkCode += ''' 997 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 998 ''' % { "reg" : reg } 999 if readDest: 1000 eWalkCode += ''' 1001 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1002 ''' % { "reg" : reg } 1003 readDestCode = '' 1004 if readDest: 1005 readDestCode = 'destElem = gtoh(destRegs.elements[i]);' 1006 if toInt: 1007 readDestCode = 'destReg = gtoh(destRegs.regs[i]);' 1008 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);' 1009 if fromInt: 1010 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);' 1011 declDest = 'Element destElem;' 1012 writeDestCode = 'destRegs.elements[i] = htog(destElem);' 1013 if toInt: 1014 declDest = 'FloatRegBits destReg;' 1015 writeDestCode = 'destRegs.regs[i] = htog(destReg);' 1016 eWalkCode += ''' 1017 for (unsigned i = 0; i < eCount; i++) { 1018 %(readOp)s 1019 %(declDest)s 1020 %(readDest)s 1021 %(op)s 1022 %(writeDest)s 1023 } 1024 ''' % { "readOp" : readOpCode, 1025 "declDest" : declDest, 1026 "readDest" : readDestCode, 1027 "op" : op, 1028 "writeDest" : writeDestCode } 1029 for reg in range(rCount): 1030 eWalkCode += ''' 1031 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]); 1032 ''' % { "reg" : reg } 1033 iop = InstObjParams(name, Name, 1034 "RegRegImmOp", 1035 { "code": eWalkCode, 1036 "r_count": rCount, 1037 "predicate_test": predicateTest }, []) 1038 header_output += NeonRegRegImmOpDeclare.subst(iop) 1039 exec_output += NeonEqualRegExecute.subst(iop) 1040 for type in types: 1041 substDict = { "targs" : type, 1042 "class_name" : Name } 1043 exec_output += NeonExecDeclare.subst(substDict) 1044 1045 def twoRegNarrowShiftInst(name, Name, types, op, readDest=False): 1046 global header_output, exec_output 1047 eWalkCode = ''' 1048 BigRegVect srcReg1; 1049 RegVect destReg; 1050 ''' 1051 for reg in range(4): 1052 eWalkCode += ''' 1053 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1054 ''' % { "reg" : reg } 1055 if readDest: 1056 for reg in range(2): 1057 eWalkCode += ''' 1058 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1059 ''' % { "reg" : reg } 1060 readDestCode = '' 1061 if readDest: 1062 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1063 eWalkCode += ''' 1064 for (unsigned i = 0; i < eCount; i++) { 1065 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1066 Element destElem; 1067 %(readDest)s 1068 %(op)s 1069 destReg.elements[i] = htog(destElem); 1070 } 1071 ''' % { "op" : op, "readDest" : readDestCode } 1072 for reg in range(2): 1073 eWalkCode += ''' 1074 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1075 ''' % { "reg" : reg } 1076 iop = InstObjParams(name, Name, 1077 "RegRegImmOp", 1078 { "code": eWalkCode, 1079 "r_count": 2, 1080 "predicate_test": predicateTest }, []) 1081 header_output += NeonRegRegImmOpDeclare.subst(iop) 1082 exec_output += NeonUnequalRegExecute.subst(iop) 1083 for type in types: 1084 substDict = { "targs" : type, 1085 "class_name" : Name } 1086 exec_output += NeonExecDeclare.subst(substDict) 1087 1088 def twoRegLongShiftInst(name, Name, types, op, readDest=False): 1089 global header_output, exec_output 1090 eWalkCode = ''' 1091 RegVect srcReg1; 1092 BigRegVect destReg; 1093 ''' 1094 for reg in range(2): 1095 eWalkCode += ''' 1096 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1097 ''' % { "reg" : reg } 1098 if readDest: 1099 for reg in range(4): 1100 eWalkCode += ''' 1101 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1102 ''' % { "reg" : reg } 1103 readDestCode = '' 1104 if readDest: 1105 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1106 eWalkCode += ''' 1107 for (unsigned i = 0; i < eCount; i++) { 1108 Element srcElem1 = gtoh(srcReg1.elements[i]); 1109 BigElement destElem; 1110 %(readDest)s 1111 %(op)s 1112 destReg.elements[i] = htog(destElem); 1113 } 1114 ''' % { "op" : op, "readDest" : readDestCode } 1115 for reg in range(4): 1116 eWalkCode += ''' 1117 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1118 ''' % { "reg" : reg } 1119 iop = InstObjParams(name, Name, 1120 "RegRegImmOp", 1121 { "code": eWalkCode, 1122 "r_count": 2, 1123 "predicate_test": predicateTest }, []) 1124 header_output += NeonRegRegImmOpDeclare.subst(iop) 1125 exec_output += NeonUnequalRegExecute.subst(iop) 1126 for type in types: 1127 substDict = { "targs" : type, 1128 "class_name" : Name } 1129 exec_output += NeonExecDeclare.subst(substDict) 1130 1131 def twoRegMiscInst(name, Name, types, rCount, op, readDest=False): 1132 global header_output, exec_output 1133 eWalkCode = ''' 1134 RegVect srcReg1, destReg; 1135 ''' 1136 for reg in range(rCount): 1137 eWalkCode += ''' 1138 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1139 ''' % { "reg" : reg } 1140 if readDest: 1141 eWalkCode += ''' 1142 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1143 ''' % { "reg" : reg } 1144 readDestCode = '' 1145 if readDest: 1146 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1147 eWalkCode += ''' 1148 for (unsigned i = 0; i < eCount; i++) { 1149 unsigned j = i; 1150 Element srcElem1 = gtoh(srcReg1.elements[i]); 1151 Element destElem; 1152 %(readDest)s 1153 %(op)s 1154 destReg.elements[j] = htog(destElem); 1155 } 1156 ''' % { "op" : op, "readDest" : readDestCode } 1157 for reg in range(rCount): 1158 eWalkCode += ''' 1159 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1160 ''' % { "reg" : reg } 1161 iop = InstObjParams(name, Name, 1162 "RegRegOp", 1163 { "code": eWalkCode, 1164 "r_count": rCount, 1165 "predicate_test": predicateTest }, []) 1166 header_output += NeonRegRegOpDeclare.subst(iop) 1167 exec_output += NeonEqualRegExecute.subst(iop) 1168 for type in types: 1169 substDict = { "targs" : type, 1170 "class_name" : Name } 1171 exec_output += NeonExecDeclare.subst(substDict) 1172 1173 def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False): 1174 global header_output, exec_output 1175 eWalkCode = ''' 1176 RegVect srcReg1, destReg; 1177 ''' 1178 for reg in range(rCount): 1179 eWalkCode += ''' 1180 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1181 ''' % { "reg" : reg } 1182 if readDest: 1183 eWalkCode += ''' 1184 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1185 ''' % { "reg" : reg } 1186 readDestCode = '' 1187 if readDest: 1188 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1189 eWalkCode += ''' 1190 for (unsigned i = 0; i < eCount; i++) { 1191 Element srcElem1 = gtoh(srcReg1.elements[imm]); 1192 Element destElem; 1193 %(readDest)s 1194 %(op)s 1195 destReg.elements[i] = htog(destElem); 1196 } 1197 ''' % { "op" : op, "readDest" : readDestCode } 1198 for reg in range(rCount): 1199 eWalkCode += ''' 1200 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1201 ''' % { "reg" : reg } 1202 iop = InstObjParams(name, Name, 1203 "RegRegImmOp", 1204 { "code": eWalkCode, 1205 "r_count": rCount, 1206 "predicate_test": predicateTest }, []) 1207 header_output += NeonRegRegImmOpDeclare.subst(iop) 1208 exec_output += NeonEqualRegExecute.subst(iop) 1209 for type in types: 1210 substDict = { "targs" : type, 1211 "class_name" : Name } 1212 exec_output += NeonExecDeclare.subst(substDict) 1213 1214 def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False): 1215 global header_output, exec_output 1216 eWalkCode = ''' 1217 RegVect srcReg1, destReg; 1218 ''' 1219 for reg in range(rCount): 1220 eWalkCode += ''' 1221 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1222 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1223 ''' % { "reg" : reg } 1224 if readDest: 1225 eWalkCode += ''' 1226 ''' % { "reg" : reg } 1227 readDestCode = '' 1228 if readDest: 1229 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1230 eWalkCode += op 1231 for reg in range(rCount): 1232 eWalkCode += ''' 1233 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1234 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]); 1235 ''' % { "reg" : reg } 1236 iop = InstObjParams(name, Name, 1237 "RegRegOp", 1238 { "code": eWalkCode, 1239 "r_count": rCount, 1240 "predicate_test": predicateTest }, []) 1241 header_output += NeonRegRegOpDeclare.subst(iop) 1242 exec_output += NeonEqualRegExecute.subst(iop) 1243 for type in types: 1244 substDict = { "targs" : type, 1245 "class_name" : Name } 1246 exec_output += NeonExecDeclare.subst(substDict) 1247 1248 def twoRegMiscInstFp(name, Name, types, rCount, op, 1249 readDest=False, toInt=False): 1250 global header_output, exec_output 1251 eWalkCode = ''' 1252 typedef FloatReg FloatVect[rCount]; 1253 FloatVect srcRegs1; 1254 ''' 1255 if toInt: 1256 eWalkCode += 'RegVect destRegs;\n' 1257 else: 1258 eWalkCode += 'FloatVect destRegs;\n' 1259 for reg in range(rCount): 1260 eWalkCode += ''' 1261 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1262 ''' % { "reg" : reg } 1263 if readDest: 1264 if toInt: 1265 eWalkCode += ''' 1266 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1267 ''' % { "reg" : reg } 1268 else: 1269 eWalkCode += ''' 1270 destRegs[%(reg)d] = FpDestP%(reg)d; 1271 ''' % { "reg" : reg } 1272 readDestCode = '' 1273 if readDest: 1274 readDestCode = 'destReg = destRegs[i];' 1275 destType = 'FloatReg' 1276 writeDest = 'destRegs[r] = destReg;' 1277 if toInt: 1278 destType = 'FloatRegBits' 1279 writeDest = 'destRegs.regs[r] = destReg;' 1280 eWalkCode += ''' 1281 for (unsigned r = 0; r < rCount; r++) { 1282 FloatReg srcReg1 = srcRegs1[r]; 1283 %(destType)s destReg; 1284 %(readDest)s 1285 %(op)s 1286 %(writeDest)s 1287 } 1288 ''' % { "op" : op, 1289 "readDest" : readDestCode, 1290 "destType" : destType, 1291 "writeDest" : writeDest } 1292 for reg in range(rCount): 1293 if toInt: 1294 eWalkCode += ''' 1295 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d]; 1296 ''' % { "reg" : reg } 1297 else: 1298 eWalkCode += ''' 1299 FpDestP%(reg)d = destRegs[%(reg)d]; 1300 ''' % { "reg" : reg } 1301 iop = InstObjParams(name, Name, 1302 "FpRegRegOp", 1303 { "code": eWalkCode, 1304 "r_count": rCount, 1305 "predicate_test": predicateTest }, []) 1306 header_output += NeonRegRegOpDeclare.subst(iop) 1307 exec_output += NeonEqualRegExecute.subst(iop) 1308 for type in types: 1309 substDict = { "targs" : type, 1310 "class_name" : Name } 1311 exec_output += NeonExecDeclare.subst(substDict) 1312 1313 def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False): 1314 global header_output, exec_output 1315 eWalkCode = ''' 1316 RegVect srcRegs; 1317 BigRegVect destReg; 1318 ''' 1319 for reg in range(rCount): 1320 eWalkCode += ''' 1321 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1322 ''' % { "reg" : reg } 1323 if readDest: 1324 eWalkCode += ''' 1325 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1326 ''' % { "reg" : reg } 1327 readDestCode = '' 1328 if readDest: 1329 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1330 eWalkCode += ''' 1331 for (unsigned i = 0; i < eCount / 2; i++) { 1332 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 1333 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 1334 BigElement destElem; 1335 %(readDest)s 1336 %(op)s 1337 destReg.elements[i] = htog(destElem); 1338 } 1339 ''' % { "op" : op, "readDest" : readDestCode } 1340 for reg in range(rCount): 1341 eWalkCode += ''' 1342 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1343 ''' % { "reg" : reg } 1344 iop = InstObjParams(name, Name, 1345 "RegRegOp", 1346 { "code": eWalkCode, 1347 "r_count": rCount, 1348 "predicate_test": predicateTest }, []) 1349 header_output += NeonRegRegOpDeclare.subst(iop) 1350 exec_output += NeonUnequalRegExecute.subst(iop) 1351 for type in types: 1352 substDict = { "targs" : type, 1353 "class_name" : Name } 1354 exec_output += NeonExecDeclare.subst(substDict) 1355 1356 def twoRegNarrowMiscInst(name, Name, types, op, readDest=False): 1357 global header_output, exec_output 1358 eWalkCode = ''' 1359 BigRegVect srcReg1; 1360 RegVect destReg; 1361 ''' 1362 for reg in range(4): 1363 eWalkCode += ''' 1364 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1365 ''' % { "reg" : reg } 1366 if readDest: 1367 for reg in range(2): 1368 eWalkCode += ''' 1369 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1370 ''' % { "reg" : reg } 1371 readDestCode = '' 1372 if readDest: 1373 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1374 eWalkCode += ''' 1375 for (unsigned i = 0; i < eCount; i++) { 1376 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1377 Element destElem; 1378 %(readDest)s 1379 %(op)s 1380 destReg.elements[i] = htog(destElem); 1381 } 1382 ''' % { "op" : op, "readDest" : readDestCode } 1383 for reg in range(2): 1384 eWalkCode += ''' 1385 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1386 ''' % { "reg" : reg } 1387 iop = InstObjParams(name, Name, 1388 "RegRegOp", 1389 { "code": eWalkCode, 1390 "r_count": 2, 1391 "predicate_test": predicateTest }, []) 1392 header_output += NeonRegRegOpDeclare.subst(iop) 1393 exec_output += NeonUnequalRegExecute.subst(iop) 1394 for type in types: 1395 substDict = { "targs" : type, 1396 "class_name" : Name } 1397 exec_output += NeonExecDeclare.subst(substDict) 1398 1399 def oneRegImmInst(name, Name, types, rCount, op, readDest=False): 1400 global header_output, exec_output 1401 eWalkCode = ''' 1402 RegVect destReg; 1403 ''' 1404 if readDest: 1405 for reg in range(rCount): 1406 eWalkCode += ''' 1407 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1408 ''' % { "reg" : reg } 1409 readDestCode = '' 1410 if readDest: 1411 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1412 eWalkCode += ''' 1413 for (unsigned i = 0; i < eCount; i++) { 1414 Element destElem; 1415 %(readDest)s 1416 %(op)s 1417 destReg.elements[i] = htog(destElem); 1418 } 1419 ''' % { "op" : op, "readDest" : readDestCode } 1420 for reg in range(rCount): 1421 eWalkCode += ''' 1422 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1423 ''' % { "reg" : reg } 1424 iop = InstObjParams(name, Name, 1425 "RegImmOp", 1426 { "code": eWalkCode, 1427 "r_count": rCount, 1428 "predicate_test": predicateTest }, []) 1429 header_output += NeonRegImmOpDeclare.subst(iop) 1430 exec_output += NeonEqualRegExecute.subst(iop) 1431 for type in types: 1432 substDict = { "targs" : type, 1433 "class_name" : Name } 1434 exec_output += NeonExecDeclare.subst(substDict) 1435 1436 def twoRegLongMiscInst(name, Name, types, op, readDest=False): 1437 global header_output, exec_output 1438 eWalkCode = ''' 1439 RegVect srcReg1; 1440 BigRegVect destReg; 1441 ''' 1442 for reg in range(2): 1443 eWalkCode += ''' 1444 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 1445 ''' % { "reg" : reg } 1446 if readDest: 1447 for reg in range(4): 1448 eWalkCode += ''' 1449 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw); 1450 ''' % { "reg" : reg } 1451 readDestCode = '' 1452 if readDest: 1453 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1454 eWalkCode += ''' 1455 for (unsigned i = 0; i < eCount; i++) { 1456 Element srcElem1 = gtoh(srcReg1.elements[i]); 1457 BigElement destElem; 1458 %(readDest)s 1459 %(op)s 1460 destReg.elements[i] = htog(destElem); 1461 } 1462 ''' % { "op" : op, "readDest" : readDestCode } 1463 for reg in range(4): 1464 eWalkCode += ''' 1465 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 1466 ''' % { "reg" : reg } 1467 iop = InstObjParams(name, Name, 1468 "RegRegOp", 1469 { "code": eWalkCode, 1470 "r_count": 2, 1471 "predicate_test": predicateTest }, []) 1472 header_output += NeonRegRegOpDeclare.subst(iop) 1473 exec_output += NeonUnequalRegExecute.subst(iop) 1474 for type in types: 1475 substDict = { "targs" : type, 1476 "class_name" : Name } 1477 exec_output += NeonExecDeclare.subst(substDict) 1478 1479 vhaddCode = ''' 1480 Element carryBit = 1481 (((unsigned)srcElem1 & 0x1) + 1482 ((unsigned)srcElem2 & 0x1)) >> 1; 1483 // Use division instead of a shift to ensure the sign extension works 1484 // right. The compiler will figure out if it can be a shift. Mask the 1485 // inputs so they get truncated correctly. 1486 destElem = (((srcElem1 & ~(Element)1) / 2) + 1487 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1488 ''' 1489 threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode) 1490 threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode) 1491 1492 vrhaddCode = ''' 1493 Element carryBit = 1494 (((unsigned)srcElem1 & 0x1) + 1495 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 1496 // Use division instead of a shift to ensure the sign extension works 1497 // right. The compiler will figure out if it can be a shift. Mask the 1498 // inputs so they get truncated correctly. 1499 destElem = (((srcElem1 & ~(Element)1) / 2) + 1500 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1501 ''' 1502 threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode) 1503 threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode) 1504 1505 vhsubCode = ''' 1506 Element barrowBit = 1507 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1508 // Use division instead of a shift to ensure the sign extension works 1509 // right. The compiler will figure out if it can be a shift. Mask the 1510 // inputs so they get truncated correctly. 1511 destElem = (((srcElem1 & ~(Element)1) / 2) - 1512 ((srcElem2 & ~(Element)1) / 2)) - barrowBit; 1513 ''' 1514 threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode) 1515 threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode) 1516 1517 vandCode = ''' 1518 destElem = srcElem1 & srcElem2; 1519 ''' 1520 threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode) 1521 threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode) 1522 1523 vbicCode = ''' 1524 destElem = srcElem1 & ~srcElem2; 1525 ''' 1526 threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode) 1527 threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode) 1528 1529 vorrCode = ''' 1530 destElem = srcElem1 | srcElem2; 1531 ''' 1532 threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode) 1533 threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode) 1534 1535 threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode) 1536 threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode) 1537 1538 vornCode = ''' 1539 destElem = srcElem1 | ~srcElem2; 1540 ''' 1541 threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode) 1542 threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode) 1543 1544 veorCode = ''' 1545 destElem = srcElem1 ^ srcElem2; 1546 ''' 1547 threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode) 1548 threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode) 1549 1550 vbifCode = ''' 1551 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2); 1552 ''' 1553 threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True) 1554 threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True) 1555 vbitCode = ''' 1556 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2); 1557 ''' 1558 threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True) 1559 threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True) 1560 vbslCode = ''' 1561 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem); 1562 ''' 1563 threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True) 1564 threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True) 1565 1566 vmaxCode = ''' 1567 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2; 1568 ''' 1569 threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode) 1570 threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode) 1571 1572 vminCode = ''' 1573 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2; 1574 ''' 1575 threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode) 1576 threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode) 1577 1578 vaddCode = ''' 1579 destElem = srcElem1 + srcElem2; 1580 ''' 1581 threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode) 1582 threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode) 1583 1584 threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes, 1585 2, vaddCode, pairwise=True) 1586 threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes, 1587 4, vaddCode, pairwise=True) 1588 vaddlwCode = ''' 1589 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 1590 ''' 1591 threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode) 1592 threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode) 1593 vaddhnCode = ''' 1594 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 1595 (sizeof(Element) * 8); 1596 ''' 1597 threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode) 1598 vraddhnCode = ''' 1599 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1600 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1601 (sizeof(Element) * 8); 1602 ''' 1603 threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode) 1604 1605 vsubCode = ''' 1606 destElem = srcElem1 - srcElem2; 1607 ''' 1608 threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode) 1609 threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode) 1610 vsublwCode = ''' 1611 destElem = (BigElement)srcElem1 - (BigElement)srcElem2; 1612 ''' 1613 threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode) 1614 threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode) 1615 1616 vqaddUCode = ''' 1617 destElem = srcElem1 + srcElem2; 1618 FPSCR fpscr = (FPSCR)Fpscr; 1619 if (destElem < srcElem1 || destElem < srcElem2) { 1620 destElem = (Element)(-1); 1621 fpscr.qc = 1; 1622 } 1623 Fpscr = fpscr; 1624 ''' 1625 threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode) 1626 threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode) 1627 vsubhnCode = ''' 1628 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 1629 (sizeof(Element) * 8); 1630 ''' 1631 threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode) 1632 vrsubhnCode = ''' 1633 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1634 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1635 (sizeof(Element) * 8); 1636 ''' 1637 threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode) 1638 1639 vqaddSCode = ''' 1640 destElem = srcElem1 + srcElem2; 1641 FPSCR fpscr = (FPSCR)Fpscr; 1642 bool negDest = (destElem < 0); 1643 bool negSrc1 = (srcElem1 < 0); 1644 bool negSrc2 = (srcElem2 < 0); 1645 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 1646 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1647 if (negDest) 1648 destElem -= 1; 1649 fpscr.qc = 1; 1650 } 1651 Fpscr = fpscr; 1652 ''' 1653 threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode) 1654 threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode) 1655 1656 vqsubUCode = ''' 1657 destElem = srcElem1 - srcElem2; 1658 FPSCR fpscr = (FPSCR)Fpscr; 1659 if (destElem > srcElem1) { 1660 destElem = 0; 1661 fpscr.qc = 1; 1662 } 1663 Fpscr = fpscr; 1664 ''' 1665 threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode) 1666 threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode) 1667 1668 vqsubSCode = ''' 1669 destElem = srcElem1 - srcElem2; 1670 FPSCR fpscr = (FPSCR)Fpscr; 1671 bool negDest = (destElem < 0); 1672 bool negSrc1 = (srcElem1 < 0); 1673 bool posSrc2 = (srcElem2 >= 0); 1674 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 1675 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 1676 if (negDest) 1677 destElem -= 1; 1678 fpscr.qc = 1; 1679 } 1680 Fpscr = fpscr; 1681 ''' 1682 threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode) 1683 threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode) 1684 1685 vcgtCode = ''' 1686 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0; 1687 ''' 1688 threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode) 1689 threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode) 1690 1691 vcgeCode = ''' 1692 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0; 1693 ''' 1694 threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode) 1695 threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode) 1696 1697 vceqCode = ''' 1698 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0; 1699 ''' 1700 threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode) 1701 threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode) 1702 1703 vshlCode = ''' 1704 int16_t shiftAmt = (int8_t)srcElem2; 1705 if (shiftAmt < 0) { 1706 shiftAmt = -shiftAmt; 1707 if (shiftAmt >= sizeof(Element) * 8) { 1708 shiftAmt = sizeof(Element) * 8 - 1; 1709 destElem = 0; 1710 } else { 1711 destElem = (srcElem1 >> shiftAmt); 1712 } 1713 // Make sure the right shift sign extended when it should. 1714 if (srcElem1 < 0 && destElem >= 0) { 1715 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1716 1 - shiftAmt)); 1717 } 1718 } else { 1719 if (shiftAmt >= sizeof(Element) * 8) { 1720 destElem = 0; 1721 } else { 1722 destElem = srcElem1 << shiftAmt; 1723 } 1724 } 1725 ''' 1726 threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode) 1727 threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode) 1728 1729 vrshlCode = ''' 1730 int16_t shiftAmt = (int8_t)srcElem2; 1731 if (shiftAmt < 0) { 1732 shiftAmt = -shiftAmt; 1733 Element rBit = 0; 1734 if (shiftAmt <= sizeof(Element) * 8) 1735 rBit = bits(srcElem1, shiftAmt - 1); 1736 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1737 rBit = 1; 1738 if (shiftAmt >= sizeof(Element) * 8) { 1739 shiftAmt = sizeof(Element) * 8 - 1; 1740 destElem = 0; 1741 } else { 1742 destElem = (srcElem1 >> shiftAmt); 1743 } 1744 // Make sure the right shift sign extended when it should. 1745 if (srcElem1 < 0 && destElem >= 0) { 1746 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1747 1 - shiftAmt)); 1748 } 1749 destElem += rBit; 1750 } else if (shiftAmt > 0) { 1751 if (shiftAmt >= sizeof(Element) * 8) { 1752 destElem = 0; 1753 } else { 1754 destElem = srcElem1 << shiftAmt; 1755 } 1756 } else { 1757 destElem = srcElem1; 1758 } 1759 ''' 1760 threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode) 1761 threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode) 1762 1763 vqshlUCode = ''' 1764 int16_t shiftAmt = (int8_t)srcElem2; 1765 FPSCR fpscr = (FPSCR)Fpscr; 1766 if (shiftAmt < 0) { 1767 shiftAmt = -shiftAmt; 1768 if (shiftAmt >= sizeof(Element) * 8) { 1769 shiftAmt = sizeof(Element) * 8 - 1; 1770 destElem = 0; 1771 } else { 1772 destElem = (srcElem1 >> shiftAmt); 1773 } 1774 // Make sure the right shift sign extended when it should. 1775 if (srcElem1 < 0 && destElem >= 0) { 1776 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1777 1 - shiftAmt)); 1778 } 1779 } else if (shiftAmt > 0) { 1780 if (shiftAmt >= sizeof(Element) * 8) { 1781 if (srcElem1 != 0) { 1782 destElem = mask(sizeof(Element) * 8); 1783 fpscr.qc = 1; 1784 } else { 1785 destElem = 0; 1786 } 1787 } else { 1788 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1789 sizeof(Element) * 8 - shiftAmt)) { 1790 destElem = mask(sizeof(Element) * 8); 1791 fpscr.qc = 1; 1792 } else { 1793 destElem = srcElem1 << shiftAmt; 1794 } 1795 } 1796 } else { 1797 destElem = srcElem1; 1798 } 1799 Fpscr = fpscr; 1800 ''' 1801 threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode) 1802 threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode) 1803 1804 vqshlSCode = ''' 1805 int16_t shiftAmt = (int8_t)srcElem2; 1806 FPSCR fpscr = (FPSCR)Fpscr; 1807 if (shiftAmt < 0) { 1808 shiftAmt = -shiftAmt; 1809 if (shiftAmt >= sizeof(Element) * 8) { 1810 shiftAmt = sizeof(Element) * 8 - 1; 1811 destElem = 0; 1812 } else { 1813 destElem = (srcElem1 >> shiftAmt); 1814 } 1815 // Make sure the right shift sign extended when it should. 1816 if (srcElem1 < 0 && destElem >= 0) { 1817 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1818 1 - shiftAmt)); 1819 } 1820 } else if (shiftAmt > 0) { 1821 bool sat = false; 1822 if (shiftAmt >= sizeof(Element) * 8) { 1823 if (srcElem1 != 0) 1824 sat = true; 1825 else 1826 destElem = 0; 1827 } else { 1828 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1829 sizeof(Element) * 8 - 1 - shiftAmt) != 1830 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1831 sat = true; 1832 } else { 1833 destElem = srcElem1 << shiftAmt; 1834 } 1835 } 1836 if (sat) { 1837 fpscr.qc = 1; 1838 destElem = mask(sizeof(Element) * 8 - 1); 1839 if (srcElem1 < 0) 1840 destElem = ~destElem; 1841 } 1842 } else { 1843 destElem = srcElem1; 1844 } 1845 Fpscr = fpscr; 1846 ''' 1847 threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode) 1848 threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode) 1849 1850 vqrshlUCode = ''' 1851 int16_t shiftAmt = (int8_t)srcElem2; 1852 FPSCR fpscr = (FPSCR)Fpscr; 1853 if (shiftAmt < 0) { 1854 shiftAmt = -shiftAmt; 1855 Element rBit = 0; 1856 if (shiftAmt <= sizeof(Element) * 8) 1857 rBit = bits(srcElem1, shiftAmt - 1); 1858 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1859 rBit = 1; 1860 if (shiftAmt >= sizeof(Element) * 8) { 1861 shiftAmt = sizeof(Element) * 8 - 1; 1862 destElem = 0; 1863 } else { 1864 destElem = (srcElem1 >> shiftAmt); 1865 } 1866 // Make sure the right shift sign extended when it should. 1867 if (srcElem1 < 0 && destElem >= 0) { 1868 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1869 1 - shiftAmt)); 1870 } 1871 destElem += rBit; 1872 } else { 1873 if (shiftAmt >= sizeof(Element) * 8) { 1874 if (srcElem1 != 0) { 1875 destElem = mask(sizeof(Element) * 8); 1876 fpscr.qc = 1; 1877 } else { 1878 destElem = 0; 1879 } 1880 } else { 1881 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1882 sizeof(Element) * 8 - shiftAmt)) { 1883 destElem = mask(sizeof(Element) * 8); 1884 fpscr.qc = 1; 1885 } else { 1886 destElem = srcElem1 << shiftAmt; 1887 } 1888 } 1889 } 1890 Fpscr = fpscr; 1891 ''' 1892 threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode) 1893 threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode) 1894 1895 vqrshlSCode = ''' 1896 int16_t shiftAmt = (int8_t)srcElem2; 1897 FPSCR fpscr = (FPSCR)Fpscr; 1898 if (shiftAmt < 0) { 1899 shiftAmt = -shiftAmt; 1900 Element rBit = 0; 1901 if (shiftAmt <= sizeof(Element) * 8) 1902 rBit = bits(srcElem1, shiftAmt - 1); 1903 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 1904 rBit = 1; 1905 if (shiftAmt >= sizeof(Element) * 8) { 1906 shiftAmt = sizeof(Element) * 8 - 1; 1907 destElem = 0; 1908 } else { 1909 destElem = (srcElem1 >> shiftAmt); 1910 } 1911 // Make sure the right shift sign extended when it should. 1912 if (srcElem1 < 0 && destElem >= 0) { 1913 destElem |= -((Element)1 << (sizeof(Element) * 8 - 1914 1 - shiftAmt)); 1915 } 1916 destElem += rBit; 1917 } else if (shiftAmt > 0) { 1918 bool sat = false; 1919 if (shiftAmt >= sizeof(Element) * 8) { 1920 if (srcElem1 != 0) 1921 sat = true; 1922 else 1923 destElem = 0; 1924 } else { 1925 if (bits(srcElem1, sizeof(Element) * 8 - 1, 1926 sizeof(Element) * 8 - 1 - shiftAmt) != 1927 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 1928 sat = true; 1929 } else { 1930 destElem = srcElem1 << shiftAmt; 1931 } 1932 } 1933 if (sat) { 1934 fpscr.qc = 1; 1935 destElem = mask(sizeof(Element) * 8 - 1); 1936 if (srcElem1 < 0) 1937 destElem = ~destElem; 1938 } 1939 } else { 1940 destElem = srcElem1; 1941 } 1942 Fpscr = fpscr; 1943 ''' 1944 threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode) 1945 threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode) 1946 1947 vabaCode = ''' 1948 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1949 (srcElem2 - srcElem1); 1950 ''' 1951 threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True) 1952 threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True) 1953 vabalCode = ''' 1954 destElem += (srcElem1 > srcElem2) ? 1955 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1956 ((BigElement)srcElem2 - (BigElement)srcElem1); 1957 ''' 1958 threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True) 1959 1960 vabdCode = ''' 1961 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1962 (srcElem2 - srcElem1); 1963 ''' 1964 threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode) 1965 threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode) 1966 vabdlCode = ''' 1967 destElem = (srcElem1 > srcElem2) ? 1968 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1969 ((BigElement)srcElem2 - (BigElement)srcElem1); 1970 ''' 1971 threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode) 1972 1973 vtstCode = ''' 1974 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0; 1975 ''' 1976 threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode) 1977 threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode) 1978 1979 vmulCode = ''' 1980 destElem = srcElem1 * srcElem2; 1981 ''' 1982 threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode) 1983 threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode) 1984 vmullCode = ''' 1985 destElem = (BigElement)srcElem1 * (BigElement)srcElem2; 1986 ''' 1987 threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode) 1988 1989 vmlaCode = ''' 1990 destElem = destElem + srcElem1 * srcElem2; 1991 ''' 1992 threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True) 1993 threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True) 1994 vmlalCode = ''' 1995 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2; 1996 ''' 1997 threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True) 1998 1999 vqdmlalCode = ''' 2000 FPSCR fpscr = (FPSCR)Fpscr; 2001 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2002 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2003 Element halfNeg = maxNeg / 2; 2004 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2005 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2006 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2007 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2008 fpscr.qc = 1; 2009 } 2010 bool negPreDest = (destElem < 0); 2011 destElem += midElem; 2012 bool negDest = (destElem < 0); 2013 bool negMid = (midElem < 0); 2014 if (negPreDest == negMid && negMid != negDest) { 2015 destElem = mask(sizeof(BigElement) * 8 - 1); 2016 if (negPreDest) 2017 destElem = ~destElem; 2018 fpscr.qc = 1; 2019 } 2020 Fpscr = fpscr; 2021 ''' 2022 threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True) 2023 2024 vqdmlslCode = ''' 2025 FPSCR fpscr = (FPSCR)Fpscr; 2026 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2027 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2028 Element halfNeg = maxNeg / 2; 2029 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2030 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2031 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2032 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2033 fpscr.qc = 1; 2034 } 2035 bool negPreDest = (destElem < 0); 2036 destElem -= midElem; 2037 bool negDest = (destElem < 0); 2038 bool posMid = (midElem > 0); 2039 if (negPreDest == posMid && posMid != negDest) { 2040 destElem = mask(sizeof(BigElement) * 8 - 1); 2041 if (negPreDest) 2042 destElem = ~destElem; 2043 fpscr.qc = 1; 2044 } 2045 Fpscr = fpscr; 2046 ''' 2047 threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True) 2048 2049 vqdmullCode = ''' 2050 FPSCR fpscr = (FPSCR)Fpscr; 2051 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2052 if (srcElem1 == srcElem2 && 2053 srcElem1 == (Element)((Element)1 << 2054 (Element)(sizeof(Element) * 8 - 1))) { 2055 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2056 fpscr.qc = 1; 2057 } 2058 Fpscr = fpscr; 2059 ''' 2060 threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode) 2061 2062 vmlsCode = ''' 2063 destElem = destElem - srcElem1 * srcElem2; 2064 ''' 2065 threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True) 2066 threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True) 2067 vmlslCode = ''' 2068 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2; 2069 ''' 2070 threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True) 2071 2072 vmulpCode = ''' 2073 destElem = 0; 2074 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2075 if (bits(srcElem2, j)) 2076 destElem ^= srcElem1 << j; 2077 } 2078 ''' 2079 threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode) 2080 threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode) 2081 vmullpCode = ''' 2082 destElem = 0; 2083 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2084 if (bits(srcElem2, j)) 2085 destElem ^= (BigElement)srcElem1 << j; 2086 } 2087 ''' 2088 threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode) 2089 2090 threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True) 2091 threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True) 2092 2093 threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True) 2094 threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True) 2095 2096 vqdmulhCode = ''' 2097 FPSCR fpscr = (FPSCR)Fpscr; 2098 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2099 (sizeof(Element) * 8); 2100 if (srcElem1 == srcElem2 && 2101 srcElem1 == (Element)((Element)1 << 2102 (sizeof(Element) * 8 - 1))) { 2103 destElem = ~srcElem1; 2104 fpscr.qc = 1; 2105 } 2106 Fpscr = fpscr; 2107 ''' 2108 threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode) 2109 threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode) 2110 2111 vqrdmulhCode = ''' 2112 FPSCR fpscr = (FPSCR)Fpscr; 2113 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2114 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2115 (sizeof(Element) * 8); 2116 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2117 Element halfNeg = maxNeg / 2; 2118 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2119 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2120 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2121 if (destElem < 0) { 2122 destElem = mask(sizeof(Element) * 8 - 1); 2123 } else { 2124 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2125 } 2126 fpscr.qc = 1; 2127 } 2128 Fpscr = fpscr; 2129 ''' 2130 threeEqualRegInst("vqrdmulh", "VqrdmulhD", 2131 smallSignedTypes, 2, vqrdmulhCode) 2132 threeEqualRegInst("vqrdmulh", "VqrdmulhQ", 2133 smallSignedTypes, 4, vqrdmulhCode) 2134 2135 vmaxfpCode = ''' 2136 FPSCR fpscr = (FPSCR)Fpscr; 2137 bool done; 2138 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2139 if (!done) { 2140 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS, 2141 true, true, VfpRoundNearest); 2142 } else if (flushToZero(srcReg1, srcReg2)) { 2143 fpscr.idc = 1; 2144 } 2145 Fpscr = fpscr; 2146 ''' 2147 threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode) 2148 threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode) 2149 2150 vminfpCode = ''' 2151 FPSCR fpscr = (FPSCR)Fpscr; 2152 bool done; 2153 destReg = processNans(fpscr, done, true, srcReg1, srcReg2); 2154 if (!done) { 2155 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS, 2156 true, true, VfpRoundNearest); 2157 } else if (flushToZero(srcReg1, srcReg2)) { 2158 fpscr.idc = 1; 2159 } 2160 Fpscr = fpscr; 2161 ''' 2162 threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode) 2163 threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode) 2164 2165 threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",), 2166 2, vmaxfpCode, pairwise=True) 2167 threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",), 2168 4, vmaxfpCode, pairwise=True) 2169 2170 threeEqualRegInstFp("vpmin", "VpminDFp", ("float",), 2171 2, vminfpCode, pairwise=True) 2172 threeEqualRegInstFp("vpmin", "VpminQFp", ("float",), 2173 4, vminfpCode, pairwise=True) 2174 2175 vaddfpCode = ''' 2176 FPSCR fpscr = Fpscr; 2177 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, 2178 true, true, VfpRoundNearest); 2179 Fpscr = fpscr; 2180 ''' 2181 threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode) 2182 threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode) 2183 2184 threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",), 2185 2, vaddfpCode, pairwise=True) 2186 threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",), 2187 4, vaddfpCode, pairwise=True) 2188 2189 vsubfpCode = ''' 2190 FPSCR fpscr = Fpscr; 2191 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2192 true, true, VfpRoundNearest); 2193 Fpscr = fpscr; 2194 ''' 2195 threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode) 2196 threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode) 2197 2198 vmulfpCode = ''' 2199 FPSCR fpscr = Fpscr; 2200 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2201 true, true, VfpRoundNearest); 2202 Fpscr = fpscr; 2203 ''' 2204 threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode) 2205 threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode) 2206 2207 vmlafpCode = ''' 2208 FPSCR fpscr = Fpscr; 2209 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2210 true, true, VfpRoundNearest); 2211 destReg = binaryOp(fpscr, mid, destReg, fpAddS, 2212 true, true, VfpRoundNearest); 2213 Fpscr = fpscr; 2214 ''' 2215 threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True) 2216 threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True) 2217 2218 vmlsfpCode = ''' 2219 FPSCR fpscr = Fpscr; 2220 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2221 true, true, VfpRoundNearest); 2222 destReg = binaryOp(fpscr, destReg, mid, fpSubS, 2223 true, true, VfpRoundNearest); 2224 Fpscr = fpscr; 2225 ''' 2226 threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True) 2227 threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True) 2228 2229 vcgtfpCode = ''' 2230 FPSCR fpscr = (FPSCR)Fpscr; 2231 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, 2232 true, true, VfpRoundNearest); 2233 destReg = (res == 0) ? -1 : 0; 2234 if (res == 2.0) 2235 fpscr.ioc = 1; 2236 Fpscr = fpscr; 2237 ''' 2238 threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",), 2239 2, vcgtfpCode, toInt = True) 2240 threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",), 2241 4, vcgtfpCode, toInt = True) 2242 2243 vcgefpCode = ''' 2244 FPSCR fpscr = (FPSCR)Fpscr; 2245 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, 2246 true, true, VfpRoundNearest); 2247 destReg = (res == 0) ? -1 : 0; 2248 if (res == 2.0) 2249 fpscr.ioc = 1; 2250 Fpscr = fpscr; 2251 ''' 2252 threeEqualRegInstFp("vcge", "VcgeDFp", ("float",), 2253 2, vcgefpCode, toInt = True) 2254 threeEqualRegInstFp("vcge", "VcgeQFp", ("float",), 2255 4, vcgefpCode, toInt = True) 2256 2257 vacgtfpCode = ''' 2258 FPSCR fpscr = (FPSCR)Fpscr; 2259 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, 2260 true, true, VfpRoundNearest); 2261 destReg = (res == 0) ? -1 : 0; 2262 if (res == 2.0) 2263 fpscr.ioc = 1; 2264 Fpscr = fpscr; 2265 ''' 2266 threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",), 2267 2, vacgtfpCode, toInt = True) 2268 threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",), 2269 4, vacgtfpCode, toInt = True) 2270 2271 vacgefpCode = ''' 2272 FPSCR fpscr = (FPSCR)Fpscr; 2273 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, 2274 true, true, VfpRoundNearest); 2275 destReg = (res == 0) ? -1 : 0; 2276 if (res == 2.0) 2277 fpscr.ioc = 1; 2278 Fpscr = fpscr; 2279 ''' 2280 threeEqualRegInstFp("vacge", "VacgeDFp", ("float",), 2281 2, vacgefpCode, toInt = True) 2282 threeEqualRegInstFp("vacge", "VacgeQFp", ("float",), 2283 4, vacgefpCode, toInt = True) 2284 2285 vceqfpCode = ''' 2286 FPSCR fpscr = (FPSCR)Fpscr; 2287 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, 2288 true, true, VfpRoundNearest); 2289 destReg = (res == 0) ? -1 : 0; 2290 if (res == 2.0) 2291 fpscr.ioc = 1; 2292 Fpscr = fpscr; 2293 ''' 2294 threeEqualRegInstFp("vceq", "VceqDFp", ("float",), 2295 2, vceqfpCode, toInt = True) 2296 threeEqualRegInstFp("vceq", "VceqQFp", ("float",), 2297 4, vceqfpCode, toInt = True) 2298 2299 vrecpsCode = ''' 2300 FPSCR fpscr = Fpscr; 2301 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, 2302 true, true, VfpRoundNearest); 2303 Fpscr = fpscr; 2304 ''' 2305 threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode) 2306 threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode) 2307 2308 vrsqrtsCode = ''' 2309 FPSCR fpscr = Fpscr; 2310 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, 2311 true, true, VfpRoundNearest); 2312 Fpscr = fpscr; 2313 ''' 2314 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode) 2315 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode) 2316 2317 vabdfpCode = ''' 2318 FPSCR fpscr = Fpscr; 2319 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2320 true, true, VfpRoundNearest); 2321 destReg = fabs(mid); 2322 Fpscr = fpscr; 2323 ''' 2324 threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode) 2325 threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode) 2326 2327 twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True) 2328 twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True) 2329 twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True) 2330 twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True) 2331 twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True) 2332 2333 twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True) 2334 twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True) 2335 twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True) 2336 twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True) 2337 twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True) 2338 2339 twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode) 2340 twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode) 2341 twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode) 2342 twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode) 2343 twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode) 2344 2345 twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode) 2346 twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True) 2347 twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True) 2348 twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode) 2349 twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode) 2350 twoEqualRegInst("vqrdmulh", "VqrdmulhsD", 2351 smallSignedTypes, 2, vqrdmulhCode) 2352 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", 2353 smallSignedTypes, 4, vqrdmulhCode) 2354 2355 vshrCode = ''' 2356 if (imm >= sizeof(srcElem1) * 8) { 2357 if (srcElem1 < 0) 2358 destElem = -1; 2359 else 2360 destElem = 0; 2361 } else { 2362 destElem = srcElem1 >> imm; 2363 } 2364 ''' 2365 twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode) 2366 twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode) 2367 2368 vsraCode = ''' 2369 Element mid;; 2370 if (imm >= sizeof(srcElem1) * 8) { 2371 mid = (srcElem1 < 0) ? -1 : 0; 2372 } else { 2373 mid = srcElem1 >> imm; 2374 if (srcElem1 < 0 && mid >= 0) { 2375 mid |= -(mid & ((Element)1 << 2376 (sizeof(Element) * 8 - 1 - imm))); 2377 } 2378 } 2379 destElem += mid; 2380 ''' 2381 twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True) 2382 twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True) 2383 2384 vrshrCode = ''' 2385 if (imm > sizeof(srcElem1) * 8) { 2386 destElem = 0; 2387 } else if (imm) { 2388 Element rBit = bits(srcElem1, imm - 1); 2389 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2390 } else { 2391 destElem = srcElem1; 2392 } 2393 ''' 2394 twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode) 2395 twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode) 2396 2397 vrsraCode = ''' 2398 if (imm > sizeof(srcElem1) * 8) { 2399 destElem += 0; 2400 } else if (imm) { 2401 Element rBit = bits(srcElem1, imm - 1); 2402 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2403 } else { 2404 destElem += srcElem1; 2405 } 2406 ''' 2407 twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True) 2408 twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True) 2409 2410 vsriCode = ''' 2411 if (imm >= sizeof(Element) * 8) 2412 destElem = destElem; 2413 else 2414 destElem = (srcElem1 >> imm) | 2415 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2416 ''' 2417 twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True) 2418 twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True) 2419 2420 vshlCode = ''' 2421 if (imm >= sizeof(Element) * 8) 2422 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 2423 else 2424 destElem = srcElem1 << imm; 2425 ''' 2426 twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode) 2427 twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode) 2428 2429 vsliCode = ''' 2430 if (imm >= sizeof(Element) * 8) 2431 destElem = destElem; 2432 else 2433 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 2434 ''' 2435 twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True) 2436 twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True) 2437 2438 vqshlCode = ''' 2439 FPSCR fpscr = (FPSCR)Fpscr; 2440 if (imm >= sizeof(Element) * 8) { 2441 if (srcElem1 != 0) { 2442 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2443 if (srcElem1 > 0) 2444 destElem = ~destElem; 2445 fpscr.qc = 1; 2446 } else { 2447 destElem = 0; 2448 } 2449 } else if (imm) { 2450 destElem = (srcElem1 << imm); 2451 uint64_t topBits = bits((uint64_t)srcElem1, 2452 sizeof(Element) * 8 - 1, 2453 sizeof(Element) * 8 - 1 - imm); 2454 if (topBits != 0 && topBits != mask(imm + 1)) { 2455 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2456 if (srcElem1 > 0) 2457 destElem = ~destElem; 2458 fpscr.qc = 1; 2459 } 2460 } else { 2461 destElem = srcElem1; 2462 } 2463 Fpscr = fpscr; 2464 ''' 2465 twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode) 2466 twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode) 2467 2468 vqshluCode = ''' 2469 FPSCR fpscr = (FPSCR)Fpscr; 2470 if (imm >= sizeof(Element) * 8) { 2471 if (srcElem1 != 0) { 2472 destElem = mask(sizeof(Element) * 8); 2473 fpscr.qc = 1; 2474 } else { 2475 destElem = 0; 2476 } 2477 } else if (imm) { 2478 destElem = (srcElem1 << imm); 2479 uint64_t topBits = bits((uint64_t)srcElem1, 2480 sizeof(Element) * 8 - 1, 2481 sizeof(Element) * 8 - imm); 2482 if (topBits != 0) { 2483 destElem = mask(sizeof(Element) * 8); 2484 fpscr.qc = 1; 2485 } 2486 } else { 2487 destElem = srcElem1; 2488 } 2489 Fpscr = fpscr; 2490 ''' 2491 twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode) 2492 twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode) 2493 2494 vqshlusCode = ''' 2495 FPSCR fpscr = (FPSCR)Fpscr; 2496 if (imm >= sizeof(Element) * 8) { 2497 if (srcElem1 < 0) { 2498 destElem = 0; 2499 fpscr.qc = 1; 2500 } else if (srcElem1 > 0) { 2501 destElem = mask(sizeof(Element) * 8); 2502 fpscr.qc = 1; 2503 } else { 2504 destElem = 0; 2505 } 2506 } else if (imm) { 2507 destElem = (srcElem1 << imm); 2508 uint64_t topBits = bits((uint64_t)srcElem1, 2509 sizeof(Element) * 8 - 1, 2510 sizeof(Element) * 8 - imm); 2511 if (srcElem1 < 0) { 2512 destElem = 0; 2513 fpscr.qc = 1; 2514 } else if (topBits != 0) { 2515 destElem = mask(sizeof(Element) * 8); 2516 fpscr.qc = 1; 2517 } 2518 } else { 2519 if (srcElem1 < 0) { 2520 fpscr.qc = 1; 2521 destElem = 0; 2522 } else { 2523 destElem = srcElem1; 2524 } 2525 } 2526 Fpscr = fpscr; 2527 ''' 2528 twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode) 2529 twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode) 2530 2531 vshrnCode = ''' 2532 if (imm >= sizeof(srcElem1) * 8) { 2533 destElem = 0; 2534 } else { 2535 destElem = srcElem1 >> imm; 2536 } 2537 ''' 2538 twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode) 2539 2540 vrshrnCode = ''' 2541 if (imm > sizeof(srcElem1) * 8) { 2542 destElem = 0; 2543 } else if (imm) { 2544 Element rBit = bits(srcElem1, imm - 1); 2545 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2546 } else { 2547 destElem = srcElem1; 2548 } 2549 ''' 2550 twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode) 2551 2552 vqshrnCode = ''' 2553 FPSCR fpscr = (FPSCR)Fpscr; 2554 if (imm > sizeof(srcElem1) * 8) { 2555 if (srcElem1 != 0 && srcElem1 != -1) 2556 fpscr.qc = 1; 2557 destElem = 0; 2558 } else if (imm) { 2559 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2560 mid |= -(mid & ((BigElement)1 << 2561 (sizeof(BigElement) * 8 - 1 - imm))); 2562 if (mid != (Element)mid) { 2563 destElem = mask(sizeof(Element) * 8 - 1); 2564 if (srcElem1 < 0) 2565 destElem = ~destElem; 2566 fpscr.qc = 1; 2567 } else { 2568 destElem = mid; 2569 } 2570 } else { 2571 destElem = srcElem1; 2572 } 2573 Fpscr = fpscr; 2574 ''' 2575 twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode) 2576 2577 vqshrunCode = ''' 2578 FPSCR fpscr = (FPSCR)Fpscr; 2579 if (imm > sizeof(srcElem1) * 8) { 2580 if (srcElem1 != 0) 2581 fpscr.qc = 1; 2582 destElem = 0; 2583 } else if (imm) { 2584 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2585 if (mid != (Element)mid) { 2586 destElem = mask(sizeof(Element) * 8); 2587 fpscr.qc = 1; 2588 } else { 2589 destElem = mid; 2590 } 2591 } else { 2592 destElem = srcElem1; 2593 } 2594 Fpscr = fpscr; 2595 ''' 2596 twoRegNarrowShiftInst("vqshrun", "NVqshrun", 2597 smallUnsignedTypes, vqshrunCode) 2598 2599 vqshrunsCode = ''' 2600 FPSCR fpscr = (FPSCR)Fpscr; 2601 if (imm > sizeof(srcElem1) * 8) { 2602 if (srcElem1 != 0) 2603 fpscr.qc = 1; 2604 destElem = 0; 2605 } else if (imm) { 2606 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2607 if (bits(mid, sizeof(BigElement) * 8 - 1, 2608 sizeof(Element) * 8) != 0) { 2609 if (srcElem1 < 0) { 2610 destElem = 0; 2611 } else { 2612 destElem = mask(sizeof(Element) * 8); 2613 } 2614 fpscr.qc = 1; 2615 } else { 2616 destElem = mid; 2617 } 2618 } else { 2619 destElem = srcElem1; 2620 } 2621 Fpscr = fpscr; 2622 ''' 2623 twoRegNarrowShiftInst("vqshrun", "NVqshruns", 2624 smallSignedTypes, vqshrunsCode) 2625 2626 vqrshrnCode = ''' 2627 FPSCR fpscr = (FPSCR)Fpscr; 2628 if (imm > sizeof(srcElem1) * 8) { 2629 if (srcElem1 != 0 && srcElem1 != -1) 2630 fpscr.qc = 1; 2631 destElem = 0; 2632 } else if (imm) { 2633 BigElement mid = (srcElem1 >> (imm - 1)); 2634 uint64_t rBit = mid & 0x1; 2635 mid >>= 1; 2636 mid |= -(mid & ((BigElement)1 << 2637 (sizeof(BigElement) * 8 - 1 - imm))); 2638 mid += rBit; 2639 if (mid != (Element)mid) { 2640 destElem = mask(sizeof(Element) * 8 - 1); 2641 if (srcElem1 < 0) 2642 destElem = ~destElem; 2643 fpscr.qc = 1; 2644 } else { 2645 destElem = mid; 2646 } 2647 } else { 2648 if (srcElem1 != (Element)srcElem1) { 2649 destElem = mask(sizeof(Element) * 8 - 1); 2650 if (srcElem1 < 0) 2651 destElem = ~destElem; 2652 fpscr.qc = 1; 2653 } else { 2654 destElem = srcElem1; 2655 } 2656 } 2657 Fpscr = fpscr; 2658 ''' 2659 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", 2660 smallSignedTypes, vqrshrnCode) 2661 2662 vqrshrunCode = ''' 2663 FPSCR fpscr = (FPSCR)Fpscr; 2664 if (imm > sizeof(srcElem1) * 8) { 2665 if (srcElem1 != 0) 2666 fpscr.qc = 1; 2667 destElem = 0; 2668 } else if (imm) { 2669 BigElement mid = (srcElem1 >> (imm - 1)); 2670 uint64_t rBit = mid & 0x1; 2671 mid >>= 1; 2672 mid += rBit; 2673 if (mid != (Element)mid) { 2674 destElem = mask(sizeof(Element) * 8); 2675 fpscr.qc = 1; 2676 } else { 2677 destElem = mid; 2678 } 2679 } else { 2680 if (srcElem1 != (Element)srcElem1) { 2681 destElem = mask(sizeof(Element) * 8 - 1); 2682 if (srcElem1 < 0) 2683 destElem = ~destElem; 2684 fpscr.qc = 1; 2685 } else { 2686 destElem = srcElem1; 2687 } 2688 } 2689 Fpscr = fpscr; 2690 ''' 2691 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", 2692 smallUnsignedTypes, vqrshrunCode) 2693 2694 vqrshrunsCode = ''' 2695 FPSCR fpscr = (FPSCR)Fpscr; 2696 if (imm > sizeof(srcElem1) * 8) { 2697 if (srcElem1 != 0) 2698 fpscr.qc = 1; 2699 destElem = 0; 2700 } else if (imm) { 2701 BigElement mid = (srcElem1 >> (imm - 1)); 2702 uint64_t rBit = mid & 0x1; 2703 mid >>= 1; 2704 mid |= -(mid & ((BigElement)1 << 2705 (sizeof(BigElement) * 8 - 1 - imm))); 2706 mid += rBit; 2707 if (bits(mid, sizeof(BigElement) * 8 - 1, 2708 sizeof(Element) * 8) != 0) { 2709 if (srcElem1 < 0) { 2710 destElem = 0; 2711 } else { 2712 destElem = mask(sizeof(Element) * 8); 2713 } 2714 fpscr.qc = 1; 2715 } else { 2716 destElem = mid; 2717 } 2718 } else { 2719 if (srcElem1 < 0) { 2720 fpscr.qc = 1; 2721 destElem = 0; 2722 } else { 2723 destElem = srcElem1; 2724 } 2725 } 2726 Fpscr = fpscr; 2727 ''' 2728 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", 2729 smallSignedTypes, vqrshrunsCode) 2730 2731 vshllCode = ''' 2732 if (imm >= sizeof(destElem) * 8) { 2733 destElem = 0; 2734 } else { 2735 destElem = (BigElement)srcElem1 << imm; 2736 } 2737 ''' 2738 twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode) 2739 2740 vmovlCode = ''' 2741 destElem = srcElem1; 2742 ''' 2743 twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode) 2744 2745 vcvt2ufxCode = ''' 2746 FPSCR fpscr = Fpscr; 2747 if (flushToZero(srcElem1)) 2748 fpscr.idc = 1; 2749 VfpSavedState state = prepFpState(VfpRoundNearest); 2750 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2751 destReg = vfpFpSToFixed(srcElem1, false, false, imm); 2752 __asm__ __volatile__("" :: "m" (destReg)); 2753 finishVfp(fpscr, state, true); 2754 Fpscr = fpscr; 2755 ''' 2756 twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",), 2757 2, vcvt2ufxCode, toInt = True) 2758 twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",), 2759 4, vcvt2ufxCode, toInt = True) 2760 2761 vcvt2sfxCode = ''' 2762 FPSCR fpscr = Fpscr; 2763 if (flushToZero(srcElem1)) 2764 fpscr.idc = 1; 2765 VfpSavedState state = prepFpState(VfpRoundNearest); 2766 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 2767 destReg = vfpFpSToFixed(srcElem1, true, false, imm); 2768 __asm__ __volatile__("" :: "m" (destReg)); 2769 finishVfp(fpscr, state, true); 2770 Fpscr = fpscr; 2771 ''' 2772 twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",), 2773 2, vcvt2sfxCode, toInt = True) 2774 twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",), 2775 4, vcvt2sfxCode, toInt = True) 2776 2777 vcvtu2fpCode = ''' 2778 FPSCR fpscr = Fpscr; 2779 VfpSavedState state = prepFpState(VfpRoundNearest); 2780 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2781 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); 2782 __asm__ __volatile__("" :: "m" (destElem)); 2783 finishVfp(fpscr, state, true); 2784 Fpscr = fpscr; 2785 ''' 2786 twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",), 2787 2, vcvtu2fpCode, fromInt = True) 2788 twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",), 2789 4, vcvtu2fpCode, fromInt = True) 2790 2791 vcvts2fpCode = ''' 2792 FPSCR fpscr = Fpscr; 2793 VfpSavedState state = prepFpState(VfpRoundNearest); 2794 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 2795 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); 2796 __asm__ __volatile__("" :: "m" (destElem)); 2797 finishVfp(fpscr, state, true); 2798 Fpscr = fpscr; 2799 ''' 2800 twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",), 2801 2, vcvts2fpCode, fromInt = True) 2802 twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",), 2803 4, vcvts2fpCode, fromInt = True) 2804 2805 vcvts2hCode = ''' 2806 FPSCR fpscr = Fpscr; 2807 float srcFp1 = bitsToFp(srcElem1, (float)0.0); 2808 if (flushToZero(srcFp1)) 2809 fpscr.idc = 1; 2810 VfpSavedState state = prepFpState(VfpRoundNearest); 2811 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem) 2812 : "m" (srcFp1), "m" (destElem)); 2813 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest, 2814 fpscr.ahp, srcFp1); 2815 __asm__ __volatile__("" :: "m" (destElem)); 2816 finishVfp(fpscr, state, true); 2817 Fpscr = fpscr; 2818 ''' 2819 twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode) 2820 2821 vcvth2sCode = ''' 2822 FPSCR fpscr = Fpscr; 2823 VfpSavedState state = prepFpState(VfpRoundNearest); 2824 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) 2825 : "m" (srcElem1), "m" (destElem)); 2826 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); 2827 __asm__ __volatile__("" :: "m" (destElem)); 2828 finishVfp(fpscr, state, true); 2829 Fpscr = fpscr; 2830 ''' 2831 twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode) 2832 2833 vrsqrteCode = ''' 2834 destElem = unsignedRSqrtEstimate(srcElem1); 2835 ''' 2836 twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode) 2837 twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode) 2838 2839 vrsqrtefpCode = ''' 2840 FPSCR fpscr = Fpscr; 2841 if (flushToZero(srcReg1)) 2842 fpscr.idc = 1; 2843 destReg = fprSqrtEstimate(fpscr, srcReg1); 2844 Fpscr = fpscr; 2845 ''' 2846 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode) 2847 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode) 2848 2849 vrecpeCode = ''' 2850 destElem = unsignedRecipEstimate(srcElem1); 2851 ''' 2852 twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode) 2853 twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode) 2854 2855 vrecpefpCode = ''' 2856 FPSCR fpscr = Fpscr; 2857 if (flushToZero(srcReg1)) 2858 fpscr.idc = 1; 2859 destReg = fpRecipEstimate(fpscr, srcReg1); 2860 Fpscr = fpscr; 2861 ''' 2862 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode) 2863 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode) 2864 2865 vrev16Code = ''' 2866 destElem = srcElem1; 2867 unsigned groupSize = ((1 << 1) / sizeof(Element)); 2868 unsigned reverseMask = (groupSize - 1); 2869 j = i ^ reverseMask; 2870 ''' 2871 twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code) 2872 twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code) 2873 vrev32Code = ''' 2874 destElem = srcElem1; 2875 unsigned groupSize = ((1 << 2) / sizeof(Element)); 2876 unsigned reverseMask = (groupSize - 1); 2877 j = i ^ reverseMask; 2878 ''' 2879 twoRegMiscInst("vrev32", "NVrev32D", 2880 ("uint8_t", "uint16_t"), 2, vrev32Code) 2881 twoRegMiscInst("vrev32", "NVrev32Q", 2882 ("uint8_t", "uint16_t"), 4, vrev32Code) 2883 vrev64Code = ''' 2884 destElem = srcElem1; 2885 unsigned groupSize = ((1 << 3) / sizeof(Element)); 2886 unsigned reverseMask = (groupSize - 1); 2887 j = i ^ reverseMask; 2888 ''' 2889 twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code) 2890 twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code) 2891 2892 vpaddlCode = ''' 2893 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 2894 ''' 2895 twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode) 2896 twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode) 2897 2898 vpadalCode = ''' 2899 destElem += (BigElement)srcElem1 + (BigElement)srcElem2; 2900 ''' 2901 twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True) 2902 twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True) 2903 2904 vclsCode = ''' 2905 unsigned count = 0; 2906 if (srcElem1 < 0) { 2907 srcElem1 <<= 1; 2908 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 2909 count++; 2910 srcElem1 <<= 1; 2911 } 2912 } else { 2913 srcElem1 <<= 1; 2914 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 2915 count++; 2916 srcElem1 <<= 1; 2917 } 2918 } 2919 destElem = count; 2920 ''' 2921 twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode) 2922 twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode) 2923 2924 vclzCode = ''' 2925 unsigned count = 0; 2926 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 2927 count++; 2928 srcElem1 <<= 1; 2929 } 2930 destElem = count; 2931 ''' 2932 twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode) 2933 twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode) 2934 2935 vcntCode = ''' 2936 unsigned count = 0; 2937 while (srcElem1 && count < sizeof(Element) * 8) { 2938 count += srcElem1 & 0x1; 2939 srcElem1 >>= 1; 2940 } 2941 destElem = count; 2942 ''' 2943 twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode) 2944 twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode) 2945 2946 vmvnCode = ''' 2947 destElem = ~srcElem1; 2948 ''' 2949 twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode) 2950 twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode) 2951 2952 vqabsCode = ''' 2953 FPSCR fpscr = (FPSCR)Fpscr; 2954 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2955 fpscr.qc = 1; 2956 destElem = ~srcElem1; 2957 } else if (srcElem1 < 0) { 2958 destElem = -srcElem1; 2959 } else { 2960 destElem = srcElem1; 2961 } 2962 Fpscr = fpscr; 2963 ''' 2964 twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode) 2965 twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode) 2966 2967 vqnegCode = ''' 2968 FPSCR fpscr = (FPSCR)Fpscr; 2969 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2970 fpscr.qc = 1; 2971 destElem = ~srcElem1; 2972 } else { 2973 destElem = -srcElem1; 2974 } 2975 Fpscr = fpscr; 2976 ''' 2977 twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode) 2978 twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode) 2979 2980 vabsCode = ''' 2981 if (srcElem1 < 0) { 2982 destElem = -srcElem1; 2983 } else { 2984 destElem = srcElem1; 2985 } 2986 ''' 2987 twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode) 2988 twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode) 2989 vabsfpCode = ''' 2990 union 2991 { 2992 uint32_t i; 2993 float f; 2994 } cStruct; 2995 cStruct.f = srcReg1; 2996 cStruct.i &= mask(sizeof(Element) * 8 - 1); 2997 destReg = cStruct.f; 2998 ''' 2999 twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode) 3000 twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode) 3001 3002 vnegCode = ''' 3003 destElem = -srcElem1; 3004 ''' 3005 twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode) 3006 twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode) 3007 vnegfpCode = ''' 3008 destReg = -srcReg1; 3009 ''' 3010 twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode) 3011 twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode) 3012 3013 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;' 3014 twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode) 3015 twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode) 3016 vcgtfpCode = ''' 3017 FPSCR fpscr = (FPSCR)Fpscr; 3018 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc, 3019 true, true, VfpRoundNearest); 3020 destReg = (res == 0) ? -1 : 0; 3021 if (res == 2.0) 3022 fpscr.ioc = 1; 3023 Fpscr = fpscr; 3024 ''' 3025 twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",), 3026 2, vcgtfpCode, toInt = True) 3027 twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",), 3028 4, vcgtfpCode, toInt = True) 3029 3030 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;' 3031 twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode) 3032 twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode) 3033 vcgefpCode = ''' 3034 FPSCR fpscr = (FPSCR)Fpscr; 3035 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc, 3036 true, true, VfpRoundNearest); 3037 destReg = (res == 0) ? -1 : 0; 3038 if (res == 2.0) 3039 fpscr.ioc = 1; 3040 Fpscr = fpscr; 3041 ''' 3042 twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",), 3043 2, vcgefpCode, toInt = True) 3044 twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",), 3045 4, vcgefpCode, toInt = True) 3046 3047 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;' 3048 twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode) 3049 twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode) 3050 vceqfpCode = ''' 3051 FPSCR fpscr = (FPSCR)Fpscr; 3052 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc, 3053 true, true, VfpRoundNearest); 3054 destReg = (res == 0) ? -1 : 0; 3055 if (res == 2.0) 3056 fpscr.ioc = 1; 3057 Fpscr = fpscr; 3058 ''' 3059 twoRegMiscInstFp("vceq", "NVceqDFp", ("float",), 3060 2, vceqfpCode, toInt = True) 3061 twoRegMiscInstFp("vceq", "NVceqQFp", ("float",), 3062 4, vceqfpCode, toInt = True) 3063 3064 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;' 3065 twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode) 3066 twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode) 3067 vclefpCode = ''' 3068 FPSCR fpscr = (FPSCR)Fpscr; 3069 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc, 3070 true, true, VfpRoundNearest); 3071 destReg = (res == 0) ? -1 : 0; 3072 if (res == 2.0) 3073 fpscr.ioc = 1; 3074 Fpscr = fpscr; 3075 ''' 3076 twoRegMiscInstFp("vcle", "NVcleDFp", ("float",), 3077 2, vclefpCode, toInt = True) 3078 twoRegMiscInstFp("vcle", "NVcleQFp", ("float",), 3079 4, vclefpCode, toInt = True) 3080 3081 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;' 3082 twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode) 3083 twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode) 3084 vcltfpCode = ''' 3085 FPSCR fpscr = (FPSCR)Fpscr; 3086 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc, 3087 true, true, VfpRoundNearest); 3088 destReg = (res == 0) ? -1 : 0; 3089 if (res == 2.0) 3090 fpscr.ioc = 1; 3091 Fpscr = fpscr; 3092 ''' 3093 twoRegMiscInstFp("vclt", "NVcltDFp", ("float",), 3094 2, vcltfpCode, toInt = True) 3095 twoRegMiscInstFp("vclt", "NVcltQFp", ("float",), 3096 4, vcltfpCode, toInt = True) 3097 3098 vswpCode = ''' 3099 FloatRegBits mid; 3100 for (unsigned r = 0; r < rCount; r++) { 3101 mid = srcReg1.regs[r]; 3102 srcReg1.regs[r] = destReg.regs[r]; 3103 destReg.regs[r] = mid; 3104 } 3105 ''' 3106 twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode) 3107 twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode) 3108 3109 vtrnCode = ''' 3110 Element mid; 3111 for (unsigned i = 0; i < eCount; i += 2) { 3112 mid = srcReg1.elements[i]; 3113 srcReg1.elements[i] = destReg.elements[i + 1]; 3114 destReg.elements[i + 1] = mid; 3115 } 3116 ''' 3117 twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode) 3118 twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode) 3119 3120 vuzpCode = ''' 3121 Element mid[eCount]; 3122 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3123 for (unsigned i = 0; i < eCount / 2; i++) { 3124 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3125 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3126 destReg.elements[i] = destReg.elements[2 * i]; 3127 } 3128 for (unsigned i = 0; i < eCount / 2; i++) { 3129 destReg.elements[eCount / 2 + i] = mid[2 * i]; 3130 } 3131 ''' 3132 twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode) 3133 twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode) 3134 3135 vzipCode = ''' 3136 Element mid[eCount]; 3137 memcpy(&mid, &destReg, sizeof(destReg)); 3138 for (unsigned i = 0; i < eCount / 2; i++) { 3139 destReg.elements[2 * i] = mid[i]; 3140 destReg.elements[2 * i + 1] = srcReg1.elements[i]; 3141 } 3142 for (int i = 0; i < eCount / 2; i++) { 3143 srcReg1.elements[2 * i] = mid[eCount / 2 + i]; 3144 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i]; 3145 } 3146 ''' 3147 twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode) 3148 twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode) 3149 3150 vmovnCode = 'destElem = srcElem1;' 3151 twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode) 3152 3153 vdupCode = 'destElem = srcElem1;' 3154 twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode) 3155 twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode) 3156 3157 def vdupGprInst(name, Name, types, rCount): 3158 global header_output, exec_output 3159 eWalkCode = ''' 3160 RegVect destReg; 3161 for (unsigned i = 0; i < eCount; i++) { 3162 destReg.elements[i] = htog((Element)Op1); 3163 } 3164 ''' 3165 for reg in range(rCount): 3166 eWalkCode += ''' 3167 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 3168 ''' % { "reg" : reg } 3169 iop = InstObjParams(name, Name, 3170 "RegRegOp", 3171 { "code": eWalkCode, 3172 "r_count": rCount, 3173 "predicate_test": predicateTest }, []) 3174 header_output += NeonRegRegOpDeclare.subst(iop) 3175 exec_output += NeonEqualRegExecute.subst(iop) 3176 for type in types: 3177 substDict = { "targs" : type, 3178 "class_name" : Name } 3179 exec_output += NeonExecDeclare.subst(substDict) 3180 vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2) 3181 vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4) 3182 3183 vmovCode = 'destElem = imm;' 3184 oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode) 3185 oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode) 3186 3187 vorrCode = 'destElem |= imm;' 3188 oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True) 3189 oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True) 3190 3191 vmvnCode = 'destElem = ~imm;' 3192 oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode) 3193 oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode) 3194 3195 vbicCode = 'destElem &= ~imm;' 3196 oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True) 3197 oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True) 3198 3199 vqmovnCode = ''' 3200 FPSCR fpscr = (FPSCR)Fpscr; 3201 destElem = srcElem1; 3202 if ((BigElement)destElem != srcElem1) { 3203 fpscr.qc = 1; 3204 destElem = mask(sizeof(Element) * 8 - 1); 3205 if (srcElem1 < 0) 3206 destElem = ~destElem; 3207 } 3208 Fpscr = fpscr; 3209 ''' 3210 twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode) 3211 3212 vqmovunCode = ''' 3213 FPSCR fpscr = (FPSCR)Fpscr; 3214 destElem = srcElem1; 3215 if ((BigElement)destElem != srcElem1) { 3216 fpscr.qc = 1; 3217 destElem = mask(sizeof(Element) * 8); 3218 } 3219 Fpscr = fpscr; 3220 ''' 3221 twoRegNarrowMiscInst("vqmovun", "NVqmovun", 3222 smallUnsignedTypes, vqmovunCode) 3223 3224 vqmovunsCode = ''' 3225 FPSCR fpscr = (FPSCR)Fpscr; 3226 destElem = srcElem1; 3227 if (srcElem1 < 0 || 3228 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 3229 fpscr.qc = 1; 3230 destElem = mask(sizeof(Element) * 8); 3231 if (srcElem1 < 0) 3232 destElem = ~destElem; 3233 } 3234 Fpscr = fpscr; 3235 ''' 3236 twoRegNarrowMiscInst("vqmovun", "NVqmovuns", 3237 smallSignedTypes, vqmovunsCode) 3238 3239 def buildVext(name, Name, types, rCount, op): 3240 global header_output, exec_output 3241 eWalkCode = ''' 3242 RegVect srcReg1, srcReg2, destReg; 3243 ''' 3244 for reg in range(rCount): 3245 eWalkCode += ''' 3246 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw); 3247 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw); 3248 ''' % { "reg" : reg } 3249 eWalkCode += op 3250 for reg in range(rCount): 3251 eWalkCode += ''' 3252 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]); 3253 ''' % { "reg" : reg } 3254 iop = InstObjParams(name, Name, 3255 "RegRegRegImmOp", 3256 { "code": eWalkCode, 3257 "r_count": rCount, 3258 "predicate_test": predicateTest }, []) 3259 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 3260 exec_output += NeonEqualRegExecute.subst(iop) 3261 for type in types: 3262 substDict = { "targs" : type, 3263 "class_name" : Name } 3264 exec_output += NeonExecDeclare.subst(substDict) 3265 3266 vextCode = ''' 3267 for (unsigned i = 0; i < eCount; i++) { 3268 unsigned index = i + imm; 3269 if (index < eCount) { 3270 destReg.elements[i] = srcReg1.elements[index]; 3271 } else { 3272 index -= eCount; 3273 assert(index < eCount); 3274 destReg.elements[i] = srcReg2.elements[index]; 3275 } 3276 } 3277 ''' 3278 buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode) 3279 buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode) 3280 3281 def buildVtbxl(name, Name, length, isVtbl): 3282 global header_output, decoder_output, exec_output 3283 code = ''' 3284 union 3285 { 3286 uint8_t bytes[32]; 3287 FloatRegBits regs[8]; 3288 } table; 3289 3290 union 3291 { 3292 uint8_t bytes[8]; 3293 FloatRegBits regs[2]; 3294 } destReg, srcReg2; 3295 3296 const unsigned length = %(length)d; 3297 const bool isVtbl = %(isVtbl)s; 3298 3299 srcReg2.regs[0] = htog(FpOp2P0.uw); 3300 srcReg2.regs[1] = htog(FpOp2P1.uw); 3301 3302 destReg.regs[0] = htog(FpDestP0.uw); 3303 destReg.regs[1] = htog(FpDestP1.uw); 3304 ''' % { "length" : length, "isVtbl" : isVtbl } 3305 for reg in range(8): 3306 if reg < length * 2: 3307 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \ 3308 { "reg" : reg } 3309 else: 3310 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg } 3311 code += ''' 3312 for (unsigned i = 0; i < sizeof(destReg); i++) { 3313 uint8_t index = srcReg2.bytes[i]; 3314 if (index < 8 * length) { 3315 destReg.bytes[i] = table.bytes[index]; 3316 } else { 3317 if (isVtbl) 3318 destReg.bytes[i] = 0; 3319 // else destReg.bytes[i] unchanged 3320 } 3321 } 3322 3323 FpDestP0.uw = gtoh(destReg.regs[0]); 3324 FpDestP1.uw = gtoh(destReg.regs[1]); 3325 ''' 3326 iop = InstObjParams(name, Name, 3327 "RegRegRegOp", 3328 { "code": code, 3329 "predicate_test": predicateTest }, []) 3330 header_output += RegRegRegOpDeclare.subst(iop) 3331 decoder_output += RegRegRegOpConstructor.subst(iop) 3332 exec_output += PredOpExecute.subst(iop) 3333 3334 buildVtbxl("vtbl", "NVtbl1", 1, "true") 3335 buildVtbxl("vtbl", "NVtbl2", 2, "true") 3336 buildVtbxl("vtbl", "NVtbl3", 3, "true") 3337 buildVtbxl("vtbl", "NVtbl4", 4, "true") 3338 3339 buildVtbxl("vtbx", "NVtbx1", 1, "false") 3340 buildVtbxl("vtbx", "NVtbx2", 2, "false") 3341 buildVtbxl("vtbx", "NVtbx3", 3, "false") 3342 buildVtbxl("vtbx", "NVtbx4", 4, "false") 3343}}; 3344