1// -*- mode:c++ -*- 2 3// Copyright (c) 2010-2011, 2015, 2019 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Gabe Black 39 40output header {{ 41 template <template <typename T> class Base> 42 StaticInstPtr 43 decodeNeonUThreeUReg(unsigned size, 44 ExtMachInst machInst, IntRegIndex dest, 45 IntRegIndex op1, IntRegIndex op2) 46 { 47 switch (size) { 48 case 0: 49 return new Base<uint8_t>(machInst, dest, op1, op2); 50 case 1: 51 return new Base<uint16_t>(machInst, dest, op1, op2); 52 case 2: 53 return new Base<uint32_t>(machInst, dest, op1, op2); 54 case 3: 55 return new Base<uint64_t>(machInst, dest, op1, op2); 56 default: 57 return new Unknown(machInst); 58 } 59 } 60 61 template <class BaseS, class BaseD> 62 StaticInstPtr 63 decodeNeonSizeSingleDouble(unsigned size, 64 ExtMachInst machInst, IntRegIndex dest, 65 IntRegIndex op1, IntRegIndex op2) 66 { 67 switch (size) { 68 case 2: 69 return new BaseS(machInst, dest, op1, op2); 70 case 3: 71 return new BaseD(machInst, dest, op1, op2); 72 default: 73 return new Unknown(machInst); 74 } 75 } 76 77 template <template <typename T> class Base> 78 StaticInstPtr 79 decodeNeonSThreeUReg(unsigned size, 80 ExtMachInst machInst, IntRegIndex dest, 81 IntRegIndex op1, IntRegIndex op2) 82 { 83 switch (size) { 84 case 0: 85 return new Base<int8_t>(machInst, dest, op1, op2); 86 case 1: 87 return new Base<int16_t>(machInst, dest, op1, op2); 88 case 2: 89 return new Base<int32_t>(machInst, dest, op1, op2); 90 case 3: 91 return new Base<int64_t>(machInst, dest, op1, op2); 92 default: 93 return new Unknown(machInst); 94 } 95 } 96 97 template <template <typename T> class Base> 98 StaticInstPtr 99 decodeNeonUSThreeUReg(bool notSigned, unsigned size, 100 ExtMachInst machInst, IntRegIndex dest, 101 IntRegIndex op1, IntRegIndex op2) 102 { 103 if (notSigned) { 104 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2); 105 } else { 106 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2); 107 } 108 } 109 110 template <template <typename T> class Base> 111 StaticInstPtr 112 decodeNeonUThreeUSReg(unsigned size, 113 ExtMachInst machInst, IntRegIndex dest, 114 IntRegIndex op1, IntRegIndex op2) 115 { 116 switch (size) { 117 case 0: 118 return new Base<uint8_t>(machInst, dest, op1, op2); 119 case 1: 120 return new Base<uint16_t>(machInst, dest, op1, op2); 121 case 2: 122 return new Base<uint32_t>(machInst, dest, op1, op2); 123 default: 124 return new Unknown(machInst); 125 } 126 } 127 128 template <template <typename T> class Base> 129 StaticInstPtr 130 decodeNeonSThreeUSReg(unsigned size, 131 ExtMachInst machInst, IntRegIndex dest, 132 IntRegIndex op1, IntRegIndex op2) 133 { 134 switch (size) { 135 case 0: 136 return new Base<int8_t>(machInst, dest, op1, op2); 137 case 1: 138 return new Base<int16_t>(machInst, dest, op1, op2); 139 case 2: 140 return new Base<int32_t>(machInst, dest, op1, op2); 141 default: 142 return new Unknown(machInst); 143 } 144 } 145 146 template <template <typename T> class Base> 147 StaticInstPtr 148 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst, 149 IntRegIndex dest, IntRegIndex op1, 150 IntRegIndex op2) 151 { 152 switch (size) { 153 case 1: 154 return new Base<int16_t>(machInst, dest, op1, op2); 155 case 2: 156 return new Base<int32_t>(machInst, dest, op1, op2); 157 default: 158 return new Unknown(machInst); 159 } 160 } 161 162 template <template <typename T> class Base> 163 StaticInstPtr 164 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst, 165 IntRegIndex dest, IntRegIndex op1, 166 IntRegIndex op2, uint64_t imm) 167 { 168 switch (size) { 169 case 1: 170 return new Base<int16_t>(machInst, dest, op1, op2, imm); 171 case 2: 172 return new Base<int32_t>(machInst, dest, op1, op2, imm); 173 default: 174 return new Unknown(machInst); 175 } 176 } 177 178 template <template <typename T> class Base> 179 StaticInstPtr 180 decodeNeonUSThreeUSReg(bool notSigned, unsigned size, 181 ExtMachInst machInst, IntRegIndex dest, 182 IntRegIndex op1, IntRegIndex op2) 183 { 184 if (notSigned) { 185 return decodeNeonUThreeUSReg<Base>( 186 size, machInst, dest, op1, op2); 187 } else { 188 return decodeNeonSThreeUSReg<Base>( 189 size, machInst, dest, op1, op2); 190 } 191 } 192 193 template <template <typename T> class BaseD, 194 template <typename T> class BaseQ> 195 StaticInstPtr 196 decodeNeonUThreeSReg(bool q, unsigned size, 197 ExtMachInst machInst, IntRegIndex dest, 198 IntRegIndex op1, IntRegIndex op2) 199 { 200 if (q) { 201 return decodeNeonUThreeUSReg<BaseQ>( 202 size, machInst, dest, op1, op2); 203 } else { 204 return decodeNeonUThreeUSReg<BaseD>( 205 size, machInst, dest, op1, op2); 206 } 207 } 208 209 template <template <typename T> class BaseD, 210 template <typename T> class BaseQ> 211 StaticInstPtr 212 decodeNeonSThreeSReg(bool q, unsigned size, 213 ExtMachInst machInst, IntRegIndex dest, 214 IntRegIndex op1, IntRegIndex op2) 215 { 216 if (q) { 217 return decodeNeonSThreeUSReg<BaseQ>( 218 size, machInst, dest, op1, op2); 219 } else { 220 return decodeNeonSThreeUSReg<BaseD>( 221 size, machInst, dest, op1, op2); 222 } 223 } 224 225 template <template <typename T> class BaseD, 226 template <typename T> class BaseQ> 227 StaticInstPtr 228 decodeNeonSThreeXReg(bool q, unsigned size, 229 ExtMachInst machInst, IntRegIndex dest, 230 IntRegIndex op1, IntRegIndex op2) 231 { 232 if (q) { 233 return decodeNeonSThreeUReg<BaseQ>( 234 size, machInst, dest, op1, op2); 235 } else { 236 return decodeNeonSThreeUSReg<BaseD>( 237 size, machInst, dest, op1, op2); 238 } 239 } 240 241 template <template <typename T> class BaseD, 242 template <typename T> class BaseQ> 243 StaticInstPtr 244 decodeNeonUThreeXReg(bool q, unsigned size, 245 ExtMachInst machInst, IntRegIndex dest, 246 IntRegIndex op1, IntRegIndex op2) 247 { 248 if (q) { 249 return decodeNeonUThreeUReg<BaseQ>( 250 size, machInst, dest, op1, op2); 251 } else { 252 return decodeNeonUThreeUSReg<BaseD>( 253 size, machInst, dest, op1, op2); 254 } 255 } 256 257 template <template <typename T> class BaseD, 258 template <typename T> class BaseQ> 259 StaticInstPtr 260 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, 261 ExtMachInst machInst, IntRegIndex dest, 262 IntRegIndex op1, IntRegIndex op2) 263 { 264 if (notSigned) { 265 return decodeNeonUThreeSReg<BaseD, BaseQ>( 266 q, size, machInst, dest, op1, op2); 267 } else { 268 return decodeNeonSThreeSReg<BaseD, BaseQ>( 269 q, size, machInst, dest, op1, op2); 270 } 271 } 272 273 template <template <typename T> class BaseD, 274 template <typename T> class BaseQ> 275 StaticInstPtr 276 decodeNeonUThreeReg(bool q, unsigned size, 277 ExtMachInst machInst, IntRegIndex dest, 278 IntRegIndex op1, IntRegIndex op2) 279 { 280 if (q) { 281 return decodeNeonUThreeUReg<BaseQ>( 282 size, machInst, dest, op1, op2); 283 } else { 284 return decodeNeonUThreeUReg<BaseD>( 285 size, machInst, dest, op1, op2); 286 } 287 } 288 289 template <template <typename T> class BaseD, 290 template <typename T> class BaseQ> 291 StaticInstPtr 292 decodeNeonSThreeReg(bool q, unsigned size, 293 ExtMachInst machInst, IntRegIndex dest, 294 IntRegIndex op1, IntRegIndex op2) 295 { 296 if (q) { 297 return decodeNeonSThreeUReg<BaseQ>( 298 size, machInst, dest, op1, op2); 299 } else { 300 return decodeNeonSThreeUReg<BaseD>( 301 size, machInst, dest, op1, op2); 302 } 303 } 304 305 template <template <typename T> class BaseD, 306 template <typename T> class BaseQ> 307 StaticInstPtr 308 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size, 309 ExtMachInst machInst, IntRegIndex dest, 310 IntRegIndex op1, IntRegIndex op2) 311 { 312 if (notSigned) { 313 return decodeNeonUThreeReg<BaseD, BaseQ>( 314 q, size, machInst, dest, op1, op2); 315 } else { 316 return decodeNeonSThreeReg<BaseD, BaseQ>( 317 q, size, machInst, dest, op1, op2); 318 } 319 } 320 321 template <template <typename T> class BaseD, 322 template <typename T> class BaseQ> 323 StaticInstPtr 324 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst, 325 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) 326 { 327 if (q) { 328 if (size) 329 return new BaseQ<uint64_t>(machInst, dest, op1, op2); 330 else 331 return new BaseQ<uint32_t>(machInst, dest, op1, op2); 332 } else { 333 if (size) 334 return new Unknown(machInst); 335 else 336 return new BaseD<uint32_t>(machInst, dest, op1, op2); 337 } 338 } 339 340 template <template <typename T> class Base> 341 StaticInstPtr 342 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst, 343 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) 344 { 345 if (size) 346 return new Base<uint64_t>(machInst, dest, op1, op2); 347 else 348 return new Base<uint32_t>(machInst, dest, op1, op2); 349 } 350 351 template <template <typename T> class Base> 352 StaticInstPtr 353 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst, 354 IntRegIndex dest, IntRegIndex op1, 355 IntRegIndex op2, uint64_t imm) 356 { 357 if (size) 358 return new Base<uint64_t>(machInst, dest, op1, op2, imm); 359 else 360 return new Base<uint32_t>(machInst, dest, op1, op2, imm); 361 } 362 363 template <template <typename T> class BaseD, 364 template <typename T> class BaseQ> 365 StaticInstPtr 366 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, 367 IntRegIndex dest, IntRegIndex op1, 368 IntRegIndex op2, uint64_t imm) 369 { 370 if (q) { 371 switch (size) { 372 case 1: 373 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm); 374 case 2: 375 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); 376 default: 377 return new Unknown(machInst); 378 } 379 } else { 380 switch (size) { 381 case 1: 382 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm); 383 case 2: 384 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); 385 default: 386 return new Unknown(machInst); 387 } 388 } 389 } 390 391 template <template <typename T> class BaseD, 392 template <typename T> class BaseQ> 393 StaticInstPtr 394 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, 395 IntRegIndex dest, IntRegIndex op1, 396 IntRegIndex op2, uint64_t imm) 397 { 398 if (q) { 399 switch (size) { 400 case 1: 401 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm); 402 case 2: 403 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm); 404 default: 405 return new Unknown(machInst); 406 } 407 } else { 408 switch (size) { 409 case 1: 410 return new BaseD<int16_t>(machInst, dest, op1, op2, imm); 411 case 2: 412 return new BaseD<int32_t>(machInst, dest, op1, op2, imm); 413 default: 414 return new Unknown(machInst); 415 } 416 } 417 } 418 419 template <template <typename T> class BaseD, 420 template <typename T> class BaseQ> 421 StaticInstPtr 422 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst, 423 IntRegIndex dest, IntRegIndex op1, 424 IntRegIndex op2, uint64_t imm) 425 { 426 if (q) { 427 if (size) 428 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm); 429 else 430 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); 431 } else { 432 if (size) 433 return new Unknown(machInst); 434 else 435 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); 436 } 437 } 438 439 template <template <typename T> class BaseD, 440 template <typename T> class BaseQ> 441 StaticInstPtr 442 decodeNeonUTwoShiftReg(bool q, unsigned size, 443 ExtMachInst machInst, IntRegIndex dest, 444 IntRegIndex op1, uint64_t imm) 445 { 446 if (q) { 447 switch (size) { 448 case 0: 449 return new BaseQ<uint8_t>(machInst, dest, op1, imm); 450 case 1: 451 return new BaseQ<uint16_t>(machInst, dest, op1, imm); 452 case 2: 453 return new BaseQ<uint32_t>(machInst, dest, op1, imm); 454 case 3: 455 return new BaseQ<uint64_t>(machInst, dest, op1, imm); 456 default: 457 return new Unknown(machInst); 458 } 459 } else { 460 switch (size) { 461 case 0: 462 return new BaseD<uint8_t>(machInst, dest, op1, imm); 463 case 1: 464 return new BaseD<uint16_t>(machInst, dest, op1, imm); 465 case 2: 466 return new BaseD<uint32_t>(machInst, dest, op1, imm); 467 case 3: 468 return new BaseD<uint64_t>(machInst, dest, op1, imm); 469 default: 470 return new Unknown(machInst); 471 } 472 } 473 } 474 475 template <template <typename T> class BaseD, 476 template <typename T> class BaseQ> 477 StaticInstPtr 478 decodeNeonSTwoShiftReg(bool q, unsigned size, 479 ExtMachInst machInst, IntRegIndex dest, 480 IntRegIndex op1, uint64_t imm) 481 { 482 if (q) { 483 switch (size) { 484 case 0: 485 return new BaseQ<int8_t>(machInst, dest, op1, imm); 486 case 1: 487 return new BaseQ<int16_t>(machInst, dest, op1, imm); 488 case 2: 489 return new BaseQ<int32_t>(machInst, dest, op1, imm); 490 case 3: 491 return new BaseQ<int64_t>(machInst, dest, op1, imm); 492 default: 493 return new Unknown(machInst); 494 } 495 } else { 496 switch (size) { 497 case 0: 498 return new BaseD<int8_t>(machInst, dest, op1, imm); 499 case 1: 500 return new BaseD<int16_t>(machInst, dest, op1, imm); 501 case 2: 502 return new BaseD<int32_t>(machInst, dest, op1, imm); 503 case 3: 504 return new BaseD<int64_t>(machInst, dest, op1, imm); 505 default: 506 return new Unknown(machInst); 507 } 508 } 509 } 510 511 512 template <template <typename T> class BaseD, 513 template <typename T> class BaseQ> 514 StaticInstPtr 515 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size, 516 ExtMachInst machInst, IntRegIndex dest, 517 IntRegIndex op1, uint64_t imm) 518 { 519 if (notSigned) { 520 return decodeNeonUTwoShiftReg<BaseD, BaseQ>( 521 q, size, machInst, dest, op1, imm); 522 } else { 523 return decodeNeonSTwoShiftReg<BaseD, BaseQ>( 524 q, size, machInst, dest, op1, imm); 525 } 526 } 527 528 template <template <typename T> class Base> 529 StaticInstPtr 530 decodeNeonUTwoShiftUSReg(unsigned size, 531 ExtMachInst machInst, IntRegIndex dest, 532 IntRegIndex op1, uint64_t imm) 533 { 534 switch (size) { 535 case 0: 536 return new Base<uint8_t>(machInst, dest, op1, imm); 537 case 1: 538 return new Base<uint16_t>(machInst, dest, op1, imm); 539 case 2: 540 return new Base<uint32_t>(machInst, dest, op1, imm); 541 default: 542 return new Unknown(machInst); 543 } 544 } 545 546 template <template <typename T> class Base> 547 StaticInstPtr 548 decodeNeonUTwoShiftUReg(unsigned size, 549 ExtMachInst machInst, IntRegIndex dest, 550 IntRegIndex op1, uint64_t imm) 551 { 552 switch (size) { 553 case 0: 554 return new Base<uint8_t>(machInst, dest, op1, imm); 555 case 1: 556 return new Base<uint16_t>(machInst, dest, op1, imm); 557 case 2: 558 return new Base<uint32_t>(machInst, dest, op1, imm); 559 case 3: 560 return new Base<uint64_t>(machInst, dest, op1, imm); 561 default: 562 return new Unknown(machInst); 563 } 564 } 565 566 template <template <typename T> class Base> 567 StaticInstPtr 568 decodeNeonSTwoShiftUReg(unsigned size, 569 ExtMachInst machInst, IntRegIndex dest, 570 IntRegIndex op1, uint64_t imm) 571 { 572 switch (size) { 573 case 0: 574 return new Base<int8_t>(machInst, dest, op1, imm); 575 case 1: 576 return new Base<int16_t>(machInst, dest, op1, imm); 577 case 2: 578 return new Base<int32_t>(machInst, dest, op1, imm); 579 case 3: 580 return new Base<int64_t>(machInst, dest, op1, imm); 581 default: 582 return new Unknown(machInst); 583 } 584 } 585 586 template <template <typename T> class BaseD, 587 template <typename T> class BaseQ> 588 StaticInstPtr 589 decodeNeonUTwoShiftSReg(bool q, unsigned size, 590 ExtMachInst machInst, IntRegIndex dest, 591 IntRegIndex op1, uint64_t imm) 592 { 593 if (q) { 594 return decodeNeonUTwoShiftUSReg<BaseQ>( 595 size, machInst, dest, op1, imm); 596 } else { 597 return decodeNeonUTwoShiftUSReg<BaseD>( 598 size, machInst, dest, op1, imm); 599 } 600 } 601 602 template <template <typename T> class Base> 603 StaticInstPtr 604 decodeNeonSTwoShiftUSReg(unsigned size, 605 ExtMachInst machInst, IntRegIndex dest, 606 IntRegIndex op1, uint64_t imm) 607 { 608 switch (size) { 609 case 0: 610 return new Base<int8_t>(machInst, dest, op1, imm); 611 case 1: 612 return new Base<int16_t>(machInst, dest, op1, imm); 613 case 2: 614 return new Base<int32_t>(machInst, dest, op1, imm); 615 default: 616 return new Unknown(machInst); 617 } 618 } 619 620 template <template <typename T> class BaseD, 621 template <typename T> class BaseQ> 622 StaticInstPtr 623 decodeNeonSTwoShiftSReg(bool q, unsigned size, 624 ExtMachInst machInst, IntRegIndex dest, 625 IntRegIndex op1, uint64_t imm) 626 { 627 if (q) { 628 return decodeNeonSTwoShiftUSReg<BaseQ>( 629 size, machInst, dest, op1, imm); 630 } else { 631 return decodeNeonSTwoShiftUSReg<BaseD>( 632 size, machInst, dest, op1, imm); 633 } 634 } 635 636 template <template <typename T> class BaseD, 637 template <typename T> class BaseQ> 638 StaticInstPtr 639 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size, 640 ExtMachInst machInst, IntRegIndex dest, 641 IntRegIndex op1, uint64_t imm) 642 { 643 if (notSigned) { 644 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 645 q, size, machInst, dest, op1, imm); 646 } else { 647 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 648 q, size, machInst, dest, op1, imm); 649 } 650 } 651 652 template <template <typename T> class BaseD, 653 template <typename T> class BaseQ> 654 StaticInstPtr 655 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, 656 IntRegIndex dest, IntRegIndex op1, uint64_t imm) 657 { 658 if (q) { 659 return decodeNeonUTwoShiftUReg<BaseQ>( 660 size, machInst, dest, op1, imm); 661 } else { 662 return decodeNeonUTwoShiftUSReg<BaseD>( 663 size, machInst, dest, op1, imm); 664 } 665 } 666 667 template <template <typename T> class BaseD, 668 template <typename T> class BaseQ> 669 StaticInstPtr 670 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, 671 IntRegIndex dest, IntRegIndex op1, uint64_t imm) 672 { 673 if (q) { 674 return decodeNeonSTwoShiftUReg<BaseQ>( 675 size, machInst, dest, op1, imm); 676 } else { 677 return decodeNeonSTwoShiftUSReg<BaseD>( 678 size, machInst, dest, op1, imm); 679 } 680 } 681 682 template <template <typename T> class Base> 683 StaticInstPtr 684 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst, 685 IntRegIndex dest, IntRegIndex op1, uint64_t imm) 686 { 687 if (size) 688 return new Base<uint64_t>(machInst, dest, op1, imm); 689 else 690 return new Base<uint32_t>(machInst, dest, op1, imm); 691 } 692 693 template <template <typename T> class BaseD, 694 template <typename T> class BaseQ> 695 StaticInstPtr 696 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst, 697 IntRegIndex dest, IntRegIndex op1, uint64_t imm) 698 { 699 if (q) { 700 if (size) 701 return new BaseQ<uint64_t>(machInst, dest, op1, imm); 702 else 703 return new BaseQ<uint32_t>(machInst, dest, op1, imm); 704 } else { 705 if (size) 706 return new Unknown(machInst); 707 else 708 return new BaseD<uint32_t>(machInst, dest, op1, imm); 709 } 710 } 711 712 template <template <typename T> class Base> 713 StaticInstPtr 714 decodeNeonUTwoMiscUSReg(unsigned size, 715 ExtMachInst machInst, IntRegIndex dest, 716 IntRegIndex op1) 717 { 718 switch (size) { 719 case 0: 720 return new Base<uint8_t>(machInst, dest, op1); 721 case 1: 722 return new Base<uint16_t>(machInst, dest, op1); 723 case 2: 724 return new Base<uint32_t>(machInst, dest, op1); 725 default: 726 return new Unknown(machInst); 727 } 728 } 729 730 template <template <typename T> class Base> 731 StaticInstPtr 732 decodeNeonSTwoMiscUSReg(unsigned size, 733 ExtMachInst machInst, IntRegIndex dest, 734 IntRegIndex op1) 735 { 736 switch (size) { 737 case 0: 738 return new Base<int8_t>(machInst, dest, op1); 739 case 1: 740 return new Base<int16_t>(machInst, dest, op1); 741 case 2: 742 return new Base<int32_t>(machInst, dest, op1); 743 default: 744 return new Unknown(machInst); 745 } 746 } 747 748 template <template <typename T> class BaseD, 749 template <typename T> class BaseQ> 750 StaticInstPtr 751 decodeNeonUTwoMiscSReg(bool q, unsigned size, 752 ExtMachInst machInst, IntRegIndex dest, 753 IntRegIndex op1) 754 { 755 if (q) { 756 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 757 } else { 758 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 759 } 760 } 761 762 template <template <typename T> class BaseD, 763 template <typename T> class BaseQ> 764 StaticInstPtr 765 decodeNeonSTwoMiscSReg(bool q, unsigned size, 766 ExtMachInst machInst, IntRegIndex dest, 767 IntRegIndex op1) 768 { 769 if (q) { 770 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 771 } else { 772 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 773 } 774 } 775 776 template <template <typename T> class Base> 777 StaticInstPtr 778 decodeNeonUTwoMiscUReg(unsigned size, 779 ExtMachInst machInst, IntRegIndex dest, 780 IntRegIndex op1) 781 { 782 switch (size) { 783 case 0: 784 return new Base<uint8_t>(machInst, dest, op1); 785 case 1: 786 return new Base<uint16_t>(machInst, dest, op1); 787 case 2: 788 return new Base<uint32_t>(machInst, dest, op1); 789 case 3: 790 return new Base<uint64_t>(machInst, dest, op1); 791 default: 792 return new Unknown(machInst); 793 } 794 } 795 796 template <template <typename T> class Base> 797 StaticInstPtr 798 decodeNeonSTwoMiscUReg(unsigned size, 799 ExtMachInst machInst, IntRegIndex dest, 800 IntRegIndex op1) 801 { 802 switch (size) { 803 case 0: 804 return new Base<int8_t>(machInst, dest, op1); 805 case 1: 806 return new Base<int16_t>(machInst, dest, op1); 807 case 2: 808 return new Base<int32_t>(machInst, dest, op1); 809 case 3: 810 return new Base<int64_t>(machInst, dest, op1); 811 default: 812 return new Unknown(machInst); 813 } 814 } 815 816 template <template <typename T> class BaseD, 817 template <typename T> class BaseQ> 818 StaticInstPtr 819 decodeNeonSTwoMiscReg(bool q, unsigned size, 820 ExtMachInst machInst, IntRegIndex dest, 821 IntRegIndex op1) 822 { 823 if (q) { 824 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 825 } else { 826 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1); 827 } 828 } 829 830 template <template <typename T> class BaseD, 831 template <typename T> class BaseQ> 832 StaticInstPtr 833 decodeNeonUTwoMiscReg(bool q, unsigned size, 834 ExtMachInst machInst, IntRegIndex dest, 835 IntRegIndex op1) 836 { 837 if (q) { 838 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 839 } else { 840 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1); 841 } 842 } 843 844 template <template <typename T> class BaseD, 845 template <typename T> class BaseQ> 846 StaticInstPtr 847 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size, 848 ExtMachInst machInst, IntRegIndex dest, 849 IntRegIndex op1) 850 { 851 if (notSigned) { 852 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 853 q, size, machInst, dest, op1); 854 } else { 855 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 856 q, size, machInst, dest, op1); 857 } 858 } 859 860 template <template <typename T> class BaseD, 861 template <typename T> class BaseQ> 862 StaticInstPtr 863 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, 864 IntRegIndex dest, IntRegIndex op1) 865 { 866 if (q) { 867 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 868 } else { 869 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 870 } 871 } 872 873 template <template <typename T> class BaseD, 874 template <typename T> class BaseQ> 875 StaticInstPtr 876 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, 877 IntRegIndex dest, IntRegIndex op1) 878 { 879 if (q) { 880 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 881 } else { 882 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 883 } 884 } 885 886 template <template <typename T> class BaseD, 887 template <typename T> class BaseQ> 888 StaticInstPtr 889 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst, 890 IntRegIndex dest, IntRegIndex op1) 891 { 892 if (q) { 893 if (size) 894 return new BaseQ<uint64_t>(machInst, dest, op1); 895 else 896 return new BaseQ<uint32_t>(machInst, dest, op1); 897 } else { 898 if (size) 899 return new Unknown(machInst); 900 else 901 return new BaseD<uint32_t>(machInst, dest, op1); 902 } 903 } 904 905 template <template <typename T> class BaseD, 906 template <typename T> class BaseQ> 907 StaticInstPtr 908 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst, 909 IntRegIndex dest, IntRegIndex op1) 910 { 911 if (size) 912 return new BaseQ<uint64_t>(machInst, dest, op1); 913 else 914 return new BaseD<uint32_t>(machInst, dest, op1); 915 } 916 917 template <template <typename T> class Base> 918 StaticInstPtr 919 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst, 920 IntRegIndex dest, IntRegIndex op1) 921 { 922 if (size) 923 return new Base<uint64_t>(machInst, dest, op1); 924 else 925 return new Base<uint32_t>(machInst, dest, op1); 926 } 927 928 template <template <typename T> class BaseD, 929 template <typename T> class BaseQ> 930 StaticInstPtr 931 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, 932 IntRegIndex dest, IntRegIndex op1) 933 { 934 if (q) { 935 switch (size) { 936 case 0x0: 937 return new BaseQ<uint8_t>(machInst, dest, op1); 938 case 0x1: 939 return new BaseQ<uint16_t>(machInst, dest, op1); 940 case 0x2: 941 return new BaseQ<uint32_t>(machInst, dest, op1); 942 default: 943 return new Unknown(machInst); 944 } 945 } else { 946 switch (size) { 947 case 0x0: 948 return new BaseD<uint8_t>(machInst, dest, op1); 949 case 0x1: 950 return new BaseD<uint16_t>(machInst, dest, op1); 951 default: 952 return new Unknown(machInst); 953 } 954 } 955 } 956 957 template <template <typename T> class BaseD, 958 template <typename T> class BaseQ, 959 template <typename T> class BaseBQ> 960 StaticInstPtr 961 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, 962 IntRegIndex dest, IntRegIndex op1) 963 { 964 if (q) { 965 switch (size) { 966 case 0x0: 967 return new BaseQ<uint8_t>(machInst, dest, op1); 968 case 0x1: 969 return new BaseQ<uint16_t>(machInst, dest, op1); 970 case 0x2: 971 return new BaseBQ<uint32_t>(machInst, dest, op1); 972 default: 973 return new Unknown(machInst); 974 } 975 } else { 976 switch (size) { 977 case 0x0: 978 return new BaseD<uint8_t>(machInst, dest, op1); 979 case 0x1: 980 return new BaseD<uint16_t>(machInst, dest, op1); 981 default: 982 return new Unknown(machInst); 983 } 984 } 985 } 986 987 template <template <typename T> class BaseD, 988 template <typename T> class BaseQ> 989 StaticInstPtr 990 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, 991 IntRegIndex dest, IntRegIndex op1) 992 { 993 if (q) { 994 switch (size) { 995 case 0x0: 996 return new BaseQ<int8_t>(machInst, dest, op1); 997 case 0x1: 998 return new BaseQ<int16_t>(machInst, dest, op1); 999 case 0x2: 1000 return new BaseQ<int32_t>(machInst, dest, op1); 1001 default: 1002 return new Unknown(machInst); 1003 } 1004 } else { 1005 switch (size) { 1006 case 0x0: 1007 return new BaseD<int8_t>(machInst, dest, op1); 1008 case 0x1: 1009 return new BaseD<int16_t>(machInst, dest, op1); 1010 default: 1011 return new Unknown(machInst); 1012 } 1013 } 1014 } 1015 1016 template <template <typename T> class BaseD, 1017 template <typename T> class BaseQ, 1018 template <typename T> class BaseBQ> 1019 StaticInstPtr 1020 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, 1021 IntRegIndex dest, IntRegIndex op1) 1022 { 1023 if (q) { 1024 switch (size) { 1025 case 0x0: 1026 return new BaseQ<uint8_t>(machInst, dest, op1); 1027 case 0x1: 1028 return new BaseQ<uint16_t>(machInst, dest, op1); 1029 case 0x2: 1030 return new BaseBQ<uint32_t>(machInst, dest, op1); 1031 default: 1032 return new Unknown(machInst); 1033 } 1034 } else { 1035 switch (size) { 1036 case 0x0: 1037 return new BaseD<uint8_t>(machInst, dest, op1); 1038 case 0x1: 1039 return new BaseD<uint16_t>(machInst, dest, op1); 1040 default: 1041 return new Unknown(machInst); 1042 } 1043 } 1044 } 1045 1046 template <template <typename T> class BaseD, 1047 template <typename T> class BaseQ, 1048 template <typename T> class BaseBQ> 1049 StaticInstPtr 1050 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, 1051 IntRegIndex dest, IntRegIndex op1) 1052 { 1053 if (q) { 1054 switch (size) { 1055 case 0x0: 1056 return new BaseQ<int8_t>(machInst, dest, op1); 1057 case 0x1: 1058 return new BaseQ<int16_t>(machInst, dest, op1); 1059 case 0x2: 1060 return new BaseBQ<int32_t>(machInst, dest, op1); 1061 default: 1062 return new Unknown(machInst); 1063 } 1064 } else { 1065 switch (size) { 1066 case 0x0: 1067 return new BaseD<int8_t>(machInst, dest, op1); 1068 case 0x1: 1069 return new BaseD<int16_t>(machInst, dest, op1); 1070 default: 1071 return new Unknown(machInst); 1072 } 1073 } 1074 } 1075}}; 1076 1077let {{ 1078 header_output = "" 1079 exec_output = "" 1080 1081 vcompares = ''' 1082 static float 1083 vcgtFunc(float op1, float op2) 1084 { 1085 if (std::isnan(op1) || std::isnan(op2)) 1086 return 2.0; 1087 return (op1 > op2) ? 0.0 : 1.0; 1088 } 1089 1090 static float 1091 vcgeFunc(float op1, float op2) 1092 { 1093 if (std::isnan(op1) || std::isnan(op2)) 1094 return 2.0; 1095 return (op1 >= op2) ? 0.0 : 1.0; 1096 } 1097 1098 static float 1099 vceqFunc(float op1, float op2) 1100 { 1101 if (isSnan(op1) || isSnan(op2)) 1102 return 2.0; 1103 return (op1 == op2) ? 0.0 : 1.0; 1104 } 1105''' 1106 vcomparesL = ''' 1107 static float 1108 vcleFunc(float op1, float op2) 1109 { 1110 if (std::isnan(op1) || std::isnan(op2)) 1111 return 2.0; 1112 return (op1 <= op2) ? 0.0 : 1.0; 1113 } 1114 1115 static float 1116 vcltFunc(float op1, float op2) 1117 { 1118 if (std::isnan(op1) || std::isnan(op2)) 1119 return 2.0; 1120 return (op1 < op2) ? 0.0 : 1.0; 1121 } 1122''' 1123 vacomparesG = ''' 1124 static float 1125 vacgtFunc(float op1, float op2) 1126 { 1127 if (std::isnan(op1) || std::isnan(op2)) 1128 return 2.0; 1129 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0; 1130 } 1131 1132 static float 1133 vacgeFunc(float op1, float op2) 1134 { 1135 if (std::isnan(op1) || std::isnan(op2)) 1136 return 2.0; 1137 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0; 1138 } 1139''' 1140 1141 exec_output += vcompares + vacomparesG 1142 1143 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t") 1144 unsignedTypes = smallUnsignedTypes + ("uint64_t",) 1145 smallSignedTypes = ("int8_t", "int16_t", "int32_t") 1146 signedTypes = smallSignedTypes + ("int64_t",) 1147 smallTypes = smallUnsignedTypes + smallSignedTypes 1148 allTypes = unsignedTypes + signedTypes 1149 1150 def threeEqualRegInst(name, Name, opClass, types, rCount, op, 1151 readDest=False, pairwise=False, 1152 standardFpcsr=False): 1153 global header_output, exec_output 1154 eWalkCode = simdEnabledCheckCode + ''' 1155 RegVect srcReg1, srcReg2, destReg; 1156 ''' 1157 for reg in range(rCount): 1158 eWalkCode += ''' 1159 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1160 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 1161 ''' % { "reg" : reg } 1162 if readDest: 1163 eWalkCode += ''' 1164 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1165 ''' % { "reg" : reg } 1166 readDestCode = '' 1167 if standardFpcsr: 1168 eWalkCode += ''' 1169 FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc); 1170 ''' 1171 if readDest: 1172 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1173 if pairwise: 1174 eWalkCode += ''' 1175 for (unsigned i = 0; i < eCount; i++) { 1176 Element srcElem1 = gtoh(2 * i < eCount ? 1177 srcReg1.elements[2 * i] : 1178 srcReg2.elements[2 * i - eCount]); 1179 Element srcElem2 = gtoh(2 * i < eCount ? 1180 srcReg1.elements[2 * i + 1] : 1181 srcReg2.elements[2 * i + 1 - eCount]); 1182 Element destElem; 1183 %(readDest)s 1184 %(op)s 1185 destReg.elements[i] = htog(destElem); 1186 } 1187 ''' % { "op" : op, "readDest" : readDestCode } 1188 else: 1189 eWalkCode += ''' 1190 for (unsigned i = 0; i < eCount; i++) { 1191 Element srcElem1 = gtoh(srcReg1.elements[i]); 1192 Element srcElem2 = gtoh(srcReg2.elements[i]); 1193 Element destElem; 1194 %(readDest)s 1195 %(op)s 1196 destReg.elements[i] = htog(destElem); 1197 } 1198 ''' % { "op" : op, "readDest" : readDestCode } 1199 if standardFpcsr: 1200 eWalkCode += ''' 1201 FpscrExc = fpscr; 1202 ''' 1203 for reg in range(rCount): 1204 eWalkCode += ''' 1205 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1206 ''' % { "reg" : reg } 1207 iop = InstObjParams(name, Name, 1208 "RegRegRegOp", 1209 { "code": eWalkCode, 1210 "r_count": rCount, 1211 "predicate_test": predicateTest, 1212 "op_class": opClass }, []) 1213 header_output += NeonRegRegRegOpDeclare.subst(iop) 1214 exec_output += NeonEqualRegExecute.subst(iop) 1215 for type in types: 1216 substDict = { "targs" : type, 1217 "class_name" : Name } 1218 exec_output += NeonExecDeclare.subst(substDict) 1219 1220 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op, 1221 readDest=False, pairwise=False, toInt=False): 1222 global header_output, exec_output 1223 eWalkCode = simdEnabledCheckCode + ''' 1224 typedef float FloatVect[rCount]; 1225 FloatVect srcRegs1, srcRegs2; 1226 ''' 1227 if toInt: 1228 eWalkCode += 'RegVect destRegs;\n' 1229 else: 1230 eWalkCode += 'FloatVect destRegs;\n' 1231 for reg in range(rCount): 1232 eWalkCode += ''' 1233 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1234 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 1235 ''' % { "reg" : reg } 1236 if readDest: 1237 if toInt: 1238 eWalkCode += ''' 1239 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1240 ''' % { "reg" : reg } 1241 else: 1242 eWalkCode += ''' 1243 destRegs[%(reg)d] = FpDestP%(reg)d; 1244 ''' % { "reg" : reg } 1245 readDestCode = '' 1246 if readDest: 1247 readDestCode = 'destReg = destRegs[r];' 1248 destType = 'float' 1249 writeDest = 'destRegs[r] = destReg;' 1250 if toInt: 1251 destType = 'uint32_t' 1252 writeDest = 'destRegs.regs[r] = destReg;' 1253 if pairwise: 1254 eWalkCode += ''' 1255 for (unsigned r = 0; r < rCount; r++) { 1256 float srcReg1 = (2 * r < rCount) ? 1257 srcRegs1[2 * r] : srcRegs2[2 * r - rCount]; 1258 float srcReg2 = (2 * r < rCount) ? 1259 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount]; 1260 %(destType)s destReg; 1261 %(readDest)s 1262 %(op)s 1263 %(writeDest)s 1264 } 1265 ''' % { "op" : op, 1266 "readDest" : readDestCode, 1267 "destType" : destType, 1268 "writeDest" : writeDest } 1269 else: 1270 eWalkCode += ''' 1271 for (unsigned r = 0; r < rCount; r++) { 1272 float srcReg1 = srcRegs1[r]; 1273 float srcReg2 = srcRegs2[r]; 1274 %(destType)s destReg; 1275 %(readDest)s 1276 %(op)s 1277 %(writeDest)s 1278 } 1279 ''' % { "op" : op, 1280 "readDest" : readDestCode, 1281 "destType" : destType, 1282 "writeDest" : writeDest } 1283 for reg in range(rCount): 1284 if toInt: 1285 eWalkCode += ''' 1286 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; 1287 ''' % { "reg" : reg } 1288 else: 1289 eWalkCode += ''' 1290 FpDestP%(reg)d = destRegs[%(reg)d]; 1291 ''' % { "reg" : reg } 1292 iop = InstObjParams(name, Name, 1293 "FpRegRegRegOp", 1294 { "code": eWalkCode, 1295 "r_count": rCount, 1296 "predicate_test": predicateTest, 1297 "op_class": opClass }, []) 1298 header_output += NeonRegRegRegOpDeclare.subst(iop) 1299 exec_output += NeonEqualRegExecute.subst(iop) 1300 for type in types: 1301 substDict = { "targs" : type, 1302 "class_name" : Name } 1303 exec_output += NeonExecDeclare.subst(substDict) 1304 1305 def threeUnequalRegInst(name, Name, opClass, types, op, 1306 bigSrc1, bigSrc2, bigDest, readDest): 1307 global header_output, exec_output 1308 src1Cnt = src2Cnt = destCnt = 2 1309 src1Prefix = src2Prefix = destPrefix = '' 1310 if bigSrc1: 1311 src1Cnt = 4 1312 src1Prefix = 'Big' 1313 if bigSrc2: 1314 src2Cnt = 4 1315 src2Prefix = 'Big' 1316 if bigDest: 1317 destCnt = 4 1318 destPrefix = 'Big' 1319 eWalkCode = simdEnabledCheckCode + ''' 1320 %sRegVect srcReg1; 1321 %sRegVect srcReg2; 1322 %sRegVect destReg; 1323 ''' % (src1Prefix, src2Prefix, destPrefix) 1324 for reg in range(src1Cnt): 1325 eWalkCode += ''' 1326 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1327 ''' % { "reg" : reg } 1328 for reg in range(src2Cnt): 1329 eWalkCode += ''' 1330 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 1331 ''' % { "reg" : reg } 1332 if readDest: 1333 for reg in range(destCnt): 1334 eWalkCode += ''' 1335 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1336 ''' % { "reg" : reg } 1337 readDestCode = '' 1338 if readDest: 1339 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1340 eWalkCode += ''' 1341 for (unsigned i = 0; i < eCount; i++) { 1342 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 1343 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]); 1344 %(destPrefix)sElement destElem; 1345 %(readDest)s 1346 %(op)s 1347 destReg.elements[i] = htog(destElem); 1348 } 1349 ''' % { "op" : op, "readDest" : readDestCode, 1350 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 1351 "destPrefix" : destPrefix } 1352 for reg in range(destCnt): 1353 eWalkCode += ''' 1354 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1355 ''' % { "reg" : reg } 1356 iop = InstObjParams(name, Name, 1357 "RegRegRegOp", 1358 { "code": eWalkCode, 1359 "r_count": 2, 1360 "predicate_test": predicateTest, 1361 "op_class": opClass }, []) 1362 header_output += NeonRegRegRegOpDeclare.subst(iop) 1363 exec_output += NeonUnequalRegExecute.subst(iop) 1364 for type in types: 1365 substDict = { "targs" : type, 1366 "class_name" : Name } 1367 exec_output += NeonExecDeclare.subst(substDict) 1368 1369 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False): 1370 threeUnequalRegInst(name, Name, opClass, types, op, 1371 True, True, False, readDest) 1372 1373 def threeRegLongInst(name, Name, opClass, types, op, readDest=False): 1374 threeUnequalRegInst(name, Name, opClass, types, op, 1375 False, False, True, readDest) 1376 1377 def threeRegWideInst(name, Name, opClass, types, op, readDest=False): 1378 threeUnequalRegInst(name, Name, opClass, types, op, 1379 True, False, True, readDest) 1380 1381 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False): 1382 global header_output, exec_output 1383 eWalkCode = simdEnabledCheckCode + ''' 1384 RegVect srcReg1, srcReg2, destReg; 1385 ''' 1386 for reg in range(rCount): 1387 eWalkCode += ''' 1388 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1389 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 1390 ''' % { "reg" : reg } 1391 if readDest: 1392 eWalkCode += ''' 1393 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1394 ''' % { "reg" : reg } 1395 readDestCode = '' 1396 if readDest: 1397 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1398 eWalkCode += ''' 1399 if (imm < 0 && imm >= eCount) { 1400 fault = std::make_shared<UndefinedInstruction>(machInst, false, 1401 mnemonic); 1402 } else { 1403 for (unsigned i = 0; i < eCount; i++) { 1404 Element srcElem1 = gtoh(srcReg1.elements[i]); 1405 Element srcElem2 = gtoh(srcReg2.elements[imm]); 1406 Element destElem; 1407 %(readDest)s 1408 %(op)s 1409 destReg.elements[i] = htog(destElem); 1410 } 1411 } 1412 ''' % { "op" : op, "readDest" : readDestCode } 1413 for reg in range(rCount): 1414 eWalkCode += ''' 1415 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1416 ''' % { "reg" : reg } 1417 iop = InstObjParams(name, Name, 1418 "RegRegRegImmOp", 1419 { "code": eWalkCode, 1420 "r_count": rCount, 1421 "predicate_test": predicateTest, 1422 "op_class": opClass }, []) 1423 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 1424 exec_output += NeonEqualRegExecute.subst(iop) 1425 for type in types: 1426 substDict = { "targs" : type, 1427 "class_name" : Name } 1428 exec_output += NeonExecDeclare.subst(substDict) 1429 1430 def twoRegLongInst(name, Name, opClass, types, op, readDest=False): 1431 global header_output, exec_output 1432 rCount = 2 1433 eWalkCode = simdEnabledCheckCode + ''' 1434 RegVect srcReg1, srcReg2; 1435 BigRegVect destReg; 1436 ''' 1437 for reg in range(rCount): 1438 eWalkCode += ''' 1439 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1440 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);; 1441 ''' % { "reg" : reg } 1442 if readDest: 1443 for reg in range(2 * rCount): 1444 eWalkCode += ''' 1445 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1446 ''' % { "reg" : reg } 1447 readDestCode = '' 1448 if readDest: 1449 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1450 eWalkCode += ''' 1451 if (imm < 0 && imm >= eCount) { 1452 fault = std::make_shared<UndefinedInstruction>(machInst, false, 1453 mnemonic); 1454 } else { 1455 for (unsigned i = 0; i < eCount; i++) { 1456 Element srcElem1 = gtoh(srcReg1.elements[i]); 1457 Element srcElem2 = gtoh(srcReg2.elements[imm]); 1458 BigElement destElem; 1459 %(readDest)s 1460 %(op)s 1461 destReg.elements[i] = htog(destElem); 1462 } 1463 } 1464 ''' % { "op" : op, "readDest" : readDestCode } 1465 for reg in range(2 * rCount): 1466 eWalkCode += ''' 1467 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1468 ''' % { "reg" : reg } 1469 iop = InstObjParams(name, Name, 1470 "RegRegRegImmOp", 1471 { "code": eWalkCode, 1472 "r_count": rCount, 1473 "predicate_test": predicateTest, 1474 "op_class": opClass }, []) 1475 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 1476 exec_output += NeonUnequalRegExecute.subst(iop) 1477 for type in types: 1478 substDict = { "targs" : type, 1479 "class_name" : Name } 1480 exec_output += NeonExecDeclare.subst(substDict) 1481 1482 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False): 1483 global header_output, exec_output 1484 eWalkCode = simdEnabledCheckCode + ''' 1485 typedef float FloatVect[rCount]; 1486 FloatVect srcRegs1, srcRegs2, destRegs; 1487 ''' 1488 for reg in range(rCount): 1489 eWalkCode += ''' 1490 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1491 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 1492 ''' % { "reg" : reg } 1493 if readDest: 1494 eWalkCode += ''' 1495 destRegs[%(reg)d] = FpDestP%(reg)d; 1496 ''' % { "reg" : reg } 1497 readDestCode = '' 1498 if readDest: 1499 readDestCode = 'destReg = destRegs[i];' 1500 eWalkCode += ''' 1501 if (imm < 0 && imm >= eCount) { 1502 fault = std::make_shared<UndefinedInstruction>(machInst, false, 1503 mnemonic); 1504 } else { 1505 for (unsigned i = 0; i < rCount; i++) { 1506 float srcReg1 = srcRegs1[i]; 1507 float srcReg2 = srcRegs2[imm]; 1508 float destReg; 1509 %(readDest)s 1510 %(op)s 1511 destRegs[i] = destReg; 1512 } 1513 } 1514 ''' % { "op" : op, "readDest" : readDestCode } 1515 for reg in range(rCount): 1516 eWalkCode += ''' 1517 FpDestP%(reg)d = destRegs[%(reg)d]; 1518 ''' % { "reg" : reg } 1519 iop = InstObjParams(name, Name, 1520 "FpRegRegRegImmOp", 1521 { "code": eWalkCode, 1522 "r_count": rCount, 1523 "predicate_test": predicateTest, 1524 "op_class": opClass }, []) 1525 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 1526 exec_output += NeonEqualRegExecute.subst(iop) 1527 for type in types: 1528 substDict = { "targs" : type, 1529 "class_name" : Name } 1530 exec_output += NeonExecDeclare.subst(substDict) 1531 1532 def twoRegShiftInst(name, Name, opClass, types, rCount, op, 1533 readDest=False, toInt=False, fromInt=False): 1534 global header_output, exec_output 1535 eWalkCode = simdEnabledCheckCode + ''' 1536 RegVect srcRegs1, destRegs; 1537 ''' 1538 for reg in range(rCount): 1539 eWalkCode += ''' 1540 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1541 ''' % { "reg" : reg } 1542 if readDest: 1543 eWalkCode += ''' 1544 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1545 ''' % { "reg" : reg } 1546 readDestCode = '' 1547 if readDest: 1548 readDestCode = 'destElem = gtoh(destRegs.elements[i]);' 1549 if toInt: 1550 readDestCode = 'destReg = gtoh(destRegs.regs[i]);' 1551 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);' 1552 if fromInt: 1553 readOpCode = 'uint32_t srcReg1 = gtoh(srcRegs1.regs[i]);' 1554 declDest = 'Element destElem;' 1555 writeDestCode = 'destRegs.elements[i] = htog(destElem);' 1556 if toInt: 1557 declDest = 'uint32_t destReg;' 1558 writeDestCode = 'destRegs.regs[i] = htog(destReg);' 1559 eWalkCode += ''' 1560 for (unsigned i = 0; i < eCount; i++) { 1561 %(readOp)s 1562 %(declDest)s 1563 %(readDest)s 1564 %(op)s 1565 %(writeDest)s 1566 } 1567 ''' % { "readOp" : readOpCode, 1568 "declDest" : declDest, 1569 "readDest" : readDestCode, 1570 "op" : op, 1571 "writeDest" : writeDestCode } 1572 for reg in range(rCount): 1573 eWalkCode += ''' 1574 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]); 1575 ''' % { "reg" : reg } 1576 iop = InstObjParams(name, Name, 1577 "RegRegImmOp", 1578 { "code": eWalkCode, 1579 "r_count": rCount, 1580 "predicate_test": predicateTest, 1581 "op_class": opClass }, []) 1582 header_output += NeonRegRegImmOpDeclare.subst(iop) 1583 exec_output += NeonEqualRegExecute.subst(iop) 1584 for type in types: 1585 substDict = { "targs" : type, 1586 "class_name" : Name } 1587 exec_output += NeonExecDeclare.subst(substDict) 1588 1589 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False): 1590 global header_output, exec_output 1591 eWalkCode = simdEnabledCheckCode + ''' 1592 BigRegVect srcReg1; 1593 RegVect destReg; 1594 ''' 1595 for reg in range(4): 1596 eWalkCode += ''' 1597 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1598 ''' % { "reg" : reg } 1599 if readDest: 1600 for reg in range(2): 1601 eWalkCode += ''' 1602 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1603 ''' % { "reg" : reg } 1604 readDestCode = '' 1605 if readDest: 1606 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1607 eWalkCode += ''' 1608 for (unsigned i = 0; i < eCount; i++) { 1609 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1610 Element destElem; 1611 %(readDest)s 1612 %(op)s 1613 destReg.elements[i] = htog(destElem); 1614 } 1615 ''' % { "op" : op, "readDest" : readDestCode } 1616 for reg in range(2): 1617 eWalkCode += ''' 1618 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1619 ''' % { "reg" : reg } 1620 iop = InstObjParams(name, Name, 1621 "RegRegImmOp", 1622 { "code": eWalkCode, 1623 "r_count": 2, 1624 "predicate_test": predicateTest, 1625 "op_class": opClass }, []) 1626 header_output += NeonRegRegImmOpDeclare.subst(iop) 1627 exec_output += NeonUnequalRegExecute.subst(iop) 1628 for type in types: 1629 substDict = { "targs" : type, 1630 "class_name" : Name } 1631 exec_output += NeonExecDeclare.subst(substDict) 1632 1633 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False): 1634 global header_output, exec_output 1635 eWalkCode = simdEnabledCheckCode + ''' 1636 RegVect srcReg1; 1637 BigRegVect destReg; 1638 ''' 1639 for reg in range(2): 1640 eWalkCode += ''' 1641 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1642 ''' % { "reg" : reg } 1643 if readDest: 1644 for reg in range(4): 1645 eWalkCode += ''' 1646 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1647 ''' % { "reg" : reg } 1648 readDestCode = '' 1649 if readDest: 1650 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1651 eWalkCode += ''' 1652 for (unsigned i = 0; i < eCount; i++) { 1653 Element srcElem1 = gtoh(srcReg1.elements[i]); 1654 BigElement destElem; 1655 %(readDest)s 1656 %(op)s 1657 destReg.elements[i] = htog(destElem); 1658 } 1659 ''' % { "op" : op, "readDest" : readDestCode } 1660 for reg in range(4): 1661 eWalkCode += ''' 1662 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1663 ''' % { "reg" : reg } 1664 iop = InstObjParams(name, Name, 1665 "RegRegImmOp", 1666 { "code": eWalkCode, 1667 "r_count": 2, 1668 "predicate_test": predicateTest, 1669 "op_class": opClass }, []) 1670 header_output += NeonRegRegImmOpDeclare.subst(iop) 1671 exec_output += NeonUnequalRegExecute.subst(iop) 1672 for type in types: 1673 substDict = { "targs" : type, 1674 "class_name" : Name } 1675 exec_output += NeonExecDeclare.subst(substDict) 1676 1677 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False): 1678 global header_output, exec_output 1679 eWalkCode = simdEnabledCheckCode + ''' 1680 RegVect srcReg1, destReg; 1681 ''' 1682 for reg in range(rCount): 1683 eWalkCode += ''' 1684 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1685 ''' % { "reg" : reg } 1686 if readDest: 1687 eWalkCode += ''' 1688 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1689 ''' % { "reg" : reg } 1690 readDestCode = '' 1691 if readDest: 1692 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1693 eWalkCode += ''' 1694 for (unsigned i = 0; i < eCount; i++) { 1695 unsigned j = i; 1696 Element srcElem1 = gtoh(srcReg1.elements[i]); 1697 Element destElem; 1698 %(readDest)s 1699 %(op)s 1700 destReg.elements[j] = htog(destElem); 1701 } 1702 ''' % { "op" : op, "readDest" : readDestCode } 1703 for reg in range(rCount): 1704 eWalkCode += ''' 1705 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1706 ''' % { "reg" : reg } 1707 iop = InstObjParams(name, Name, 1708 "RegRegOp", 1709 { "code": eWalkCode, 1710 "r_count": rCount, 1711 "predicate_test": predicateTest, 1712 "op_class": opClass }, []) 1713 header_output += NeonRegRegOpDeclare.subst(iop) 1714 exec_output += NeonEqualRegExecute.subst(iop) 1715 for type in types: 1716 substDict = { "targs" : type, 1717 "class_name" : Name } 1718 exec_output += NeonExecDeclare.subst(substDict) 1719 1720 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False): 1721 global header_output, exec_output 1722 eWalkCode = simdEnabledCheckCode + ''' 1723 RegVect srcReg1, destReg; 1724 ''' 1725 for reg in range(rCount): 1726 eWalkCode += ''' 1727 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1728 ''' % { "reg" : reg } 1729 if readDest: 1730 eWalkCode += ''' 1731 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1732 ''' % { "reg" : reg } 1733 readDestCode = '' 1734 if readDest: 1735 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1736 eWalkCode += ''' 1737 for (unsigned i = 0; i < eCount; i++) { 1738 Element srcElem1 = gtoh(srcReg1.elements[imm]); 1739 Element destElem; 1740 %(readDest)s 1741 %(op)s 1742 destReg.elements[i] = htog(destElem); 1743 } 1744 ''' % { "op" : op, "readDest" : readDestCode } 1745 for reg in range(rCount): 1746 eWalkCode += ''' 1747 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1748 ''' % { "reg" : reg } 1749 iop = InstObjParams(name, Name, 1750 "RegRegImmOp", 1751 { "code": eWalkCode, 1752 "r_count": rCount, 1753 "predicate_test": predicateTest, 1754 "op_class": opClass }, []) 1755 header_output += NeonRegRegImmOpDeclare.subst(iop) 1756 exec_output += NeonEqualRegExecute.subst(iop) 1757 for type in types: 1758 substDict = { "targs" : type, 1759 "class_name" : Name } 1760 exec_output += NeonExecDeclare.subst(substDict) 1761 1762 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False): 1763 global header_output, exec_output 1764 eWalkCode = simdEnabledCheckCode + ''' 1765 RegVect srcReg1, destReg; 1766 ''' 1767 for reg in range(rCount): 1768 eWalkCode += ''' 1769 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1770 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1771 ''' % { "reg" : reg } 1772 if readDest: 1773 eWalkCode += ''' 1774 ''' % { "reg" : reg } 1775 readDestCode = '' 1776 if readDest: 1777 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1778 eWalkCode += op 1779 for reg in range(rCount): 1780 eWalkCode += ''' 1781 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1782 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]); 1783 ''' % { "reg" : reg } 1784 iop = InstObjParams(name, Name, 1785 "RegRegOp", 1786 { "code": eWalkCode, 1787 "r_count": rCount, 1788 "predicate_test": predicateTest, 1789 "op_class": opClass }, []) 1790 header_output += NeonRegRegOpDeclare.subst(iop) 1791 exec_output += NeonEqualRegExecute.subst(iop) 1792 for type in types: 1793 substDict = { "targs" : type, 1794 "class_name" : Name } 1795 exec_output += NeonExecDeclare.subst(substDict) 1796 1797 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op, 1798 readDest=False, toInt=False): 1799 global header_output, exec_output 1800 eWalkCode = simdEnabledCheckCode + ''' 1801 typedef float FloatVect[rCount]; 1802 FloatVect srcRegs1; 1803 ''' 1804 if toInt: 1805 eWalkCode += 'RegVect destRegs;\n' 1806 else: 1807 eWalkCode += 'FloatVect destRegs;\n' 1808 for reg in range(rCount): 1809 eWalkCode += ''' 1810 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1811 ''' % { "reg" : reg } 1812 if readDest: 1813 if toInt: 1814 eWalkCode += ''' 1815 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1816 ''' % { "reg" : reg } 1817 else: 1818 eWalkCode += ''' 1819 destRegs[%(reg)d] = FpDestP%(reg)d; 1820 ''' % { "reg" : reg } 1821 readDestCode = '' 1822 if readDest: 1823 readDestCode = 'destReg = destRegs[i];' 1824 destType = 'float' 1825 writeDest = 'destRegs[r] = destReg;' 1826 if toInt: 1827 destType = 'uint32_t' 1828 writeDest = 'destRegs.regs[r] = destReg;' 1829 eWalkCode += ''' 1830 for (unsigned r = 0; r < rCount; r++) { 1831 float srcReg1 = srcRegs1[r]; 1832 %(destType)s destReg; 1833 %(readDest)s 1834 %(op)s 1835 %(writeDest)s 1836 } 1837 ''' % { "op" : op, 1838 "readDest" : readDestCode, 1839 "destType" : destType, 1840 "writeDest" : writeDest } 1841 for reg in range(rCount): 1842 if toInt: 1843 eWalkCode += ''' 1844 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; 1845 ''' % { "reg" : reg } 1846 else: 1847 eWalkCode += ''' 1848 FpDestP%(reg)d = destRegs[%(reg)d]; 1849 ''' % { "reg" : reg } 1850 iop = InstObjParams(name, Name, 1851 "FpRegRegOp", 1852 { "code": eWalkCode, 1853 "r_count": rCount, 1854 "predicate_test": predicateTest, 1855 "op_class": opClass }, []) 1856 header_output += NeonRegRegOpDeclare.subst(iop) 1857 exec_output += NeonEqualRegExecute.subst(iop) 1858 for type in types: 1859 substDict = { "targs" : type, 1860 "class_name" : Name } 1861 exec_output += NeonExecDeclare.subst(substDict) 1862 1863 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False): 1864 global header_output, exec_output 1865 eWalkCode = simdEnabledCheckCode + ''' 1866 RegVect srcRegs; 1867 BigRegVect destReg; 1868 ''' 1869 for reg in range(rCount): 1870 eWalkCode += ''' 1871 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1872 ''' % { "reg" : reg } 1873 if readDest: 1874 eWalkCode += ''' 1875 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1876 ''' % { "reg" : reg } 1877 readDestCode = '' 1878 if readDest: 1879 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1880 eWalkCode += ''' 1881 for (unsigned i = 0; i < eCount / 2; i++) { 1882 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 1883 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 1884 BigElement destElem; 1885 %(readDest)s 1886 %(op)s 1887 destReg.elements[i] = htog(destElem); 1888 } 1889 ''' % { "op" : op, "readDest" : readDestCode } 1890 for reg in range(rCount): 1891 eWalkCode += ''' 1892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1893 ''' % { "reg" : reg } 1894 iop = InstObjParams(name, Name, 1895 "RegRegOp", 1896 { "code": eWalkCode, 1897 "r_count": rCount, 1898 "predicate_test": predicateTest, 1899 "op_class": opClass }, []) 1900 header_output += NeonRegRegOpDeclare.subst(iop) 1901 exec_output += NeonUnequalRegExecute.subst(iop) 1902 for type in types: 1903 substDict = { "targs" : type, 1904 "class_name" : Name } 1905 exec_output += NeonExecDeclare.subst(substDict) 1906 1907 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False): 1908 global header_output, exec_output 1909 eWalkCode = simdEnabledCheckCode + ''' 1910 BigRegVect srcReg1; 1911 RegVect destReg; 1912 ''' 1913 for reg in range(4): 1914 eWalkCode += ''' 1915 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1916 ''' % { "reg" : reg } 1917 if readDest: 1918 for reg in range(2): 1919 eWalkCode += ''' 1920 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1921 ''' % { "reg" : reg } 1922 readDestCode = '' 1923 if readDest: 1924 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1925 eWalkCode += ''' 1926 for (unsigned i = 0; i < eCount; i++) { 1927 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1928 Element destElem; 1929 %(readDest)s 1930 %(op)s 1931 destReg.elements[i] = htog(destElem); 1932 } 1933 ''' % { "op" : op, "readDest" : readDestCode } 1934 for reg in range(2): 1935 eWalkCode += ''' 1936 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1937 ''' % { "reg" : reg } 1938 iop = InstObjParams(name, Name, 1939 "RegRegOp", 1940 { "code": eWalkCode, 1941 "r_count": 2, 1942 "predicate_test": predicateTest, 1943 "op_class": opClass }, []) 1944 header_output += NeonRegRegOpDeclare.subst(iop) 1945 exec_output += NeonUnequalRegExecute.subst(iop) 1946 for type in types: 1947 substDict = { "targs" : type, 1948 "class_name" : Name } 1949 exec_output += NeonExecDeclare.subst(substDict) 1950 1951 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False): 1952 global header_output, exec_output 1953 eWalkCode = simdEnabledCheckCode + ''' 1954 RegVect destReg; 1955 ''' 1956 if readDest: 1957 for reg in range(rCount): 1958 eWalkCode += ''' 1959 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1960 ''' % { "reg" : reg } 1961 readDestCode = '' 1962 if readDest: 1963 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1964 eWalkCode += ''' 1965 for (unsigned i = 0; i < eCount; i++) { 1966 Element destElem; 1967 %(readDest)s 1968 %(op)s 1969 destReg.elements[i] = htog(destElem); 1970 } 1971 ''' % { "op" : op, "readDest" : readDestCode } 1972 for reg in range(rCount): 1973 eWalkCode += ''' 1974 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1975 ''' % { "reg" : reg } 1976 iop = InstObjParams(name, Name, 1977 "RegImmOp", 1978 { "code": eWalkCode, 1979 "r_count": rCount, 1980 "predicate_test": predicateTest, 1981 "op_class": opClass }, []) 1982 header_output += NeonRegImmOpDeclare.subst(iop) 1983 exec_output += NeonEqualRegExecute.subst(iop) 1984 for type in types: 1985 substDict = { "targs" : type, 1986 "class_name" : Name } 1987 exec_output += NeonExecDeclare.subst(substDict) 1988 1989 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False): 1990 global header_output, exec_output 1991 eWalkCode = simdEnabledCheckCode + ''' 1992 RegVect srcReg1; 1993 BigRegVect destReg; 1994 ''' 1995 for reg in range(2): 1996 eWalkCode += ''' 1997 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1998 ''' % { "reg" : reg } 1999 if readDest: 2000 for reg in range(4): 2001 eWalkCode += ''' 2002 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 2003 ''' % { "reg" : reg } 2004 readDestCode = '' 2005 if readDest: 2006 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 2007 eWalkCode += ''' 2008 for (unsigned i = 0; i < eCount; i++) { 2009 Element srcElem1 = gtoh(srcReg1.elements[i]); 2010 BigElement destElem; 2011 %(readDest)s 2012 %(op)s 2013 destReg.elements[i] = htog(destElem); 2014 } 2015 ''' % { "op" : op, "readDest" : readDestCode } 2016 for reg in range(4): 2017 eWalkCode += ''' 2018 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 2019 ''' % { "reg" : reg } 2020 iop = InstObjParams(name, Name, 2021 "RegRegOp", 2022 { "code": eWalkCode, 2023 "r_count": 2, 2024 "predicate_test": predicateTest, 2025 "op_class": opClass }, []) 2026 header_output += NeonRegRegOpDeclare.subst(iop) 2027 exec_output += NeonUnequalRegExecute.subst(iop) 2028 for type in types: 2029 substDict = { "targs" : type, 2030 "class_name" : Name } 2031 exec_output += NeonExecDeclare.subst(substDict) 2032 2033 vhaddCode = ''' 2034 Element carryBit = 2035 (((unsigned)srcElem1 & 0x1) + 2036 ((unsigned)srcElem2 & 0x1)) >> 1; 2037 // Use division instead of a shift to ensure the sign extension works 2038 // right. The compiler will figure out if it can be a shift. Mask the 2039 // inputs so they get truncated correctly. 2040 destElem = (((srcElem1 & ~(Element)1) / 2) + 2041 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 2042 ''' 2043 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode) 2044 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode) 2045 2046 vrhaddCode = ''' 2047 Element carryBit = 2048 (((unsigned)srcElem1 & 0x1) + 2049 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 2050 // Use division instead of a shift to ensure the sign extension works 2051 // right. The compiler will figure out if it can be a shift. Mask the 2052 // inputs so they get truncated correctly. 2053 destElem = (((srcElem1 & ~(Element)1) / 2) + 2054 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 2055 ''' 2056 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode) 2057 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode) 2058 2059 vhsubCode = ''' 2060 Element barrowBit = 2061 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 2062 // Use division instead of a shift to ensure the sign extension works 2063 // right. The compiler will figure out if it can be a shift. Mask the 2064 // inputs so they get truncated correctly. 2065 destElem = (((srcElem1 & ~(Element)1) / 2) - 2066 ((srcElem2 & ~(Element)1) / 2)) - barrowBit; 2067 ''' 2068 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode) 2069 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode) 2070 2071 vandCode = ''' 2072 destElem = srcElem1 & srcElem2; 2073 ''' 2074 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode) 2075 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode) 2076 2077 vbicCode = ''' 2078 destElem = srcElem1 & ~srcElem2; 2079 ''' 2080 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode) 2081 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode) 2082 2083 vorrCode = ''' 2084 destElem = srcElem1 | srcElem2; 2085 ''' 2086 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode) 2087 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode) 2088 2089 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode) 2090 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode) 2091 2092 vornCode = ''' 2093 destElem = srcElem1 | ~srcElem2; 2094 ''' 2095 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode) 2096 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode) 2097 2098 veorCode = ''' 2099 destElem = srcElem1 ^ srcElem2; 2100 ''' 2101 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode) 2102 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode) 2103 2104 vbifCode = ''' 2105 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2); 2106 ''' 2107 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True) 2108 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True) 2109 vbitCode = ''' 2110 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2); 2111 ''' 2112 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True) 2113 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True) 2114 vbslCode = ''' 2115 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem); 2116 ''' 2117 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True) 2118 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True) 2119 2120 vmaxCode = ''' 2121 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2; 2122 ''' 2123 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode) 2124 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode) 2125 2126 vminCode = ''' 2127 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2; 2128 ''' 2129 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode) 2130 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode) 2131 2132 vaddCode = ''' 2133 destElem = srcElem1 + srcElem2; 2134 ''' 2135 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode) 2136 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode) 2137 2138 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes, 2139 2, vaddCode, pairwise=True) 2140 vaddlwCode = ''' 2141 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 2142 ''' 2143 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode) 2144 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode) 2145 vaddhnCode = ''' 2146 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 2147 (sizeof(Element) * 8); 2148 ''' 2149 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode) 2150 vraddhnCode = ''' 2151 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 2152 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 2153 (sizeof(Element) * 8); 2154 ''' 2155 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode) 2156 2157 vsubCode = ''' 2158 destElem = srcElem1 - srcElem2; 2159 ''' 2160 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode) 2161 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode) 2162 vsublwCode = ''' 2163 destElem = (BigElement)srcElem1 - (BigElement)srcElem2; 2164 ''' 2165 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode) 2166 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode) 2167 2168 vqaddUCode = ''' 2169 destElem = srcElem1 + srcElem2; 2170 FPSCR fpscr = (FPSCR) FpscrQc; 2171 if (destElem < srcElem1 || destElem < srcElem2) { 2172 destElem = (Element)(-1); 2173 fpscr.qc = 1; 2174 } 2175 FpscrQc = fpscr; 2176 ''' 2177 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode) 2178 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode) 2179 vsubhnCode = ''' 2180 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 2181 (sizeof(Element) * 8); 2182 ''' 2183 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode) 2184 vrsubhnCode = ''' 2185 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 2186 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 2187 (sizeof(Element) * 8); 2188 ''' 2189 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode) 2190 2191 vqaddSCode = ''' 2192 destElem = srcElem1 + srcElem2; 2193 FPSCR fpscr = (FPSCR) FpscrQc; 2194 bool negDest = (destElem < 0); 2195 bool negSrc1 = (srcElem1 < 0); 2196 bool negSrc2 = (srcElem2 < 0); 2197 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 2198 if (negDest) 2199 /* If (>=0) plus (>=0) yields (<0), saturate to +. */ 2200 destElem = std::numeric_limits<Element>::max(); 2201 else 2202 /* If (<0) plus (<0) yields (>=0), saturate to -. */ 2203 destElem = std::numeric_limits<Element>::min(); 2204 fpscr.qc = 1; 2205 } 2206 FpscrQc = fpscr; 2207 ''' 2208 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode) 2209 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode) 2210 2211 vqsubUCode = ''' 2212 destElem = srcElem1 - srcElem2; 2213 FPSCR fpscr = (FPSCR) FpscrQc; 2214 if (destElem > srcElem1) { 2215 destElem = 0; 2216 fpscr.qc = 1; 2217 } 2218 FpscrQc = fpscr; 2219 ''' 2220 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode) 2221 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode) 2222 2223 vqsubSCode = ''' 2224 destElem = srcElem1 - srcElem2; 2225 FPSCR fpscr = (FPSCR) FpscrQc; 2226 bool negDest = (destElem < 0); 2227 bool negSrc1 = (srcElem1 < 0); 2228 bool posSrc2 = (srcElem2 >= 0); 2229 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 2230 if (negDest) 2231 /* If (>=0) minus (<0) yields (<0), saturate to +. */ 2232 destElem = std::numeric_limits<Element>::max(); 2233 else 2234 /* If (<0) minus (>=0) yields (>=0), saturate to -. */ 2235 destElem = std::numeric_limits<Element>::min(); 2236 fpscr.qc = 1; 2237 } 2238 FpscrQc = fpscr; 2239 ''' 2240 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode) 2241 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode) 2242 2243 vcgtCode = ''' 2244 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0; 2245 ''' 2246 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode) 2247 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode) 2248 2249 vcgeCode = ''' 2250 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0; 2251 ''' 2252 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode) 2253 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode) 2254 2255 vceqCode = ''' 2256 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0; 2257 ''' 2258 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode) 2259 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode) 2260 2261 vshlCode = ''' 2262 int16_t shiftAmt = (int8_t)srcElem2; 2263 if (shiftAmt < 0) { 2264 shiftAmt = -shiftAmt; 2265 if (shiftAmt >= sizeof(Element) * 8) { 2266 shiftAmt = sizeof(Element) * 8 - 1; 2267 destElem = 0; 2268 } else { 2269 destElem = (srcElem1 >> shiftAmt); 2270 } 2271 // Make sure the right shift sign extended when it should. 2272 if (ltz(srcElem1) && !ltz(destElem)) { 2273 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2274 1 - shiftAmt)); 2275 } 2276 } else { 2277 if (shiftAmt >= sizeof(Element) * 8) { 2278 destElem = 0; 2279 } else { 2280 destElem = srcElem1 << shiftAmt; 2281 } 2282 } 2283 ''' 2284 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode) 2285 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode) 2286 2287 vrshlCode = ''' 2288 int16_t shiftAmt = (int8_t)srcElem2; 2289 if (shiftAmt < 0) { 2290 shiftAmt = -shiftAmt; 2291 Element rBit = 0; 2292 if (shiftAmt <= sizeof(Element) * 8) 2293 rBit = bits(srcElem1, shiftAmt - 1); 2294 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 2295 rBit = 1; 2296 if (shiftAmt >= sizeof(Element) * 8) { 2297 shiftAmt = sizeof(Element) * 8 - 1; 2298 destElem = 0; 2299 } else { 2300 destElem = (srcElem1 >> shiftAmt); 2301 } 2302 // Make sure the right shift sign extended when it should. 2303 if (ltz(srcElem1) && !ltz(destElem)) { 2304 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2305 1 - shiftAmt)); 2306 } 2307 destElem += rBit; 2308 } else if (shiftAmt > 0) { 2309 if (shiftAmt >= sizeof(Element) * 8) { 2310 destElem = 0; 2311 } else { 2312 destElem = srcElem1 << shiftAmt; 2313 } 2314 } else { 2315 destElem = srcElem1; 2316 } 2317 ''' 2318 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode) 2319 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode) 2320 2321 vqshlUCode = ''' 2322 int16_t shiftAmt = (int8_t)srcElem2; 2323 FPSCR fpscr = (FPSCR) FpscrQc; 2324 if (shiftAmt < 0) { 2325 shiftAmt = -shiftAmt; 2326 if (shiftAmt >= sizeof(Element) * 8) { 2327 shiftAmt = sizeof(Element) * 8 - 1; 2328 destElem = 0; 2329 } else { 2330 destElem = (srcElem1 >> shiftAmt); 2331 } 2332 } else if (shiftAmt > 0) { 2333 if (shiftAmt >= sizeof(Element) * 8) { 2334 if (srcElem1 != 0) { 2335 destElem = mask(sizeof(Element) * 8); 2336 fpscr.qc = 1; 2337 } else { 2338 destElem = 0; 2339 } 2340 } else { 2341 if (bits(srcElem1, sizeof(Element) * 8 - 1, 2342 sizeof(Element) * 8 - shiftAmt)) { 2343 destElem = mask(sizeof(Element) * 8); 2344 fpscr.qc = 1; 2345 } else { 2346 destElem = srcElem1 << shiftAmt; 2347 } 2348 } 2349 } else { 2350 destElem = srcElem1; 2351 } 2352 FpscrQc = fpscr; 2353 ''' 2354 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode) 2355 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode) 2356 2357 vqshlSCode = ''' 2358 int16_t shiftAmt = (int8_t)srcElem2; 2359 FPSCR fpscr = (FPSCR) FpscrQc; 2360 if (shiftAmt < 0) { 2361 shiftAmt = -shiftAmt; 2362 if (shiftAmt >= sizeof(Element) * 8) { 2363 shiftAmt = sizeof(Element) * 8 - 1; 2364 destElem = 0; 2365 } else { 2366 destElem = (srcElem1 >> shiftAmt); 2367 } 2368 // Make sure the right shift sign extended when it should. 2369 if (srcElem1 < 0 && destElem >= 0) { 2370 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2371 1 - shiftAmt)); 2372 } 2373 } else if (shiftAmt > 0) { 2374 bool sat = false; 2375 if (shiftAmt >= sizeof(Element) * 8) { 2376 if (srcElem1 != 0) 2377 sat = true; 2378 else 2379 destElem = 0; 2380 } else { 2381 if (bits(srcElem1, sizeof(Element) * 8 - 1, 2382 sizeof(Element) * 8 - 1 - shiftAmt) != 2383 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2384 sat = true; 2385 } else { 2386 destElem = srcElem1 << shiftAmt; 2387 } 2388 } 2389 if (sat) { 2390 fpscr.qc = 1; 2391 destElem = mask(sizeof(Element) * 8 - 1); 2392 if (srcElem1 < 0) 2393 destElem = ~destElem; 2394 } 2395 } else { 2396 destElem = srcElem1; 2397 } 2398 FpscrQc = fpscr; 2399 ''' 2400 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode) 2401 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode) 2402 2403 vqrshlUCode = ''' 2404 int16_t shiftAmt = (int8_t)srcElem2; 2405 FPSCR fpscr = (FPSCR) FpscrQc; 2406 if (shiftAmt < 0) { 2407 shiftAmt = -shiftAmt; 2408 Element rBit = 0; 2409 if (shiftAmt <= sizeof(Element) * 8) 2410 rBit = bits(srcElem1, shiftAmt - 1); 2411 if (shiftAmt >= sizeof(Element) * 8) { 2412 shiftAmt = sizeof(Element) * 8 - 1; 2413 destElem = 0; 2414 } else { 2415 destElem = (srcElem1 >> shiftAmt); 2416 } 2417 destElem += rBit; 2418 } else { 2419 if (shiftAmt >= sizeof(Element) * 8) { 2420 if (srcElem1 != 0) { 2421 destElem = mask(sizeof(Element) * 8); 2422 fpscr.qc = 1; 2423 } else { 2424 destElem = 0; 2425 } 2426 } else { 2427 if (bits(srcElem1, sizeof(Element) * 8 - 1, 2428 sizeof(Element) * 8 - shiftAmt)) { 2429 destElem = mask(sizeof(Element) * 8); 2430 fpscr.qc = 1; 2431 } else { 2432 destElem = srcElem1 << shiftAmt; 2433 } 2434 } 2435 } 2436 FpscrQc = fpscr; 2437 ''' 2438 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode) 2439 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode) 2440 2441 vqrshlSCode = ''' 2442 int16_t shiftAmt = (int8_t)srcElem2; 2443 FPSCR fpscr = (FPSCR) FpscrQc; 2444 if (shiftAmt < 0) { 2445 shiftAmt = -shiftAmt; 2446 Element rBit = 0; 2447 if (shiftAmt <= sizeof(Element) * 8) 2448 rBit = bits(srcElem1, shiftAmt - 1); 2449 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 2450 rBit = 1; 2451 if (shiftAmt >= sizeof(Element) * 8) { 2452 shiftAmt = sizeof(Element) * 8 - 1; 2453 destElem = 0; 2454 } else { 2455 destElem = (srcElem1 >> shiftAmt); 2456 } 2457 // Make sure the right shift sign extended when it should. 2458 if (srcElem1 < 0 && destElem >= 0) { 2459 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2460 1 - shiftAmt)); 2461 } 2462 destElem += rBit; 2463 } else if (shiftAmt > 0) { 2464 bool sat = false; 2465 if (shiftAmt >= sizeof(Element) * 8) { 2466 if (srcElem1 != 0) 2467 sat = true; 2468 else 2469 destElem = 0; 2470 } else { 2471 if (bits(srcElem1, sizeof(Element) * 8 - 1, 2472 sizeof(Element) * 8 - 1 - shiftAmt) != 2473 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2474 sat = true; 2475 } else { 2476 destElem = srcElem1 << shiftAmt; 2477 } 2478 } 2479 if (sat) { 2480 fpscr.qc = 1; 2481 destElem = mask(sizeof(Element) * 8 - 1); 2482 if (srcElem1 < 0) 2483 destElem = ~destElem; 2484 } 2485 } else { 2486 destElem = srcElem1; 2487 } 2488 FpscrQc = fpscr; 2489 ''' 2490 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode) 2491 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode) 2492 2493 vabaCode = ''' 2494 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 2495 (srcElem2 - srcElem1); 2496 ''' 2497 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True) 2498 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True) 2499 vabalCode = ''' 2500 destElem += (srcElem1 > srcElem2) ? 2501 ((BigElement)srcElem1 - (BigElement)srcElem2) : 2502 ((BigElement)srcElem2 - (BigElement)srcElem1); 2503 ''' 2504 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True) 2505 2506 vabdCode = ''' 2507 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 2508 (srcElem2 - srcElem1); 2509 ''' 2510 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode) 2511 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode) 2512 vabdlCode = ''' 2513 destElem = (srcElem1 > srcElem2) ? 2514 ((BigElement)srcElem1 - (BigElement)srcElem2) : 2515 ((BigElement)srcElem2 - (BigElement)srcElem1); 2516 ''' 2517 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode) 2518 2519 vtstCode = ''' 2520 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0; 2521 ''' 2522 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode) 2523 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode) 2524 2525 vmulCode = ''' 2526 destElem = srcElem1 * srcElem2; 2527 ''' 2528 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode) 2529 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode) 2530 vmullCode = ''' 2531 destElem = (BigElement)srcElem1 * (BigElement)srcElem2; 2532 ''' 2533 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode) 2534 2535 vmlaCode = ''' 2536 destElem = destElem + srcElem1 * srcElem2; 2537 ''' 2538 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True) 2539 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True) 2540 vmlalCode = ''' 2541 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2; 2542 ''' 2543 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True) 2544 2545 vqdmlalCode = ''' 2546 FPSCR fpscr = (FPSCR) FpscrQc; 2547 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2548 Element maxNeg = std::numeric_limits<Element>::min(); 2549 Element halfNeg = maxNeg / 2; 2550 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2551 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2552 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2553 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2554 fpscr.qc = 1; 2555 } 2556 bool negPreDest = ltz(destElem); 2557 destElem += midElem; 2558 bool negDest = ltz(destElem); 2559 bool negMid = ltz(midElem); 2560 if (negPreDest == negMid && negMid != negDest) { 2561 destElem = mask(sizeof(BigElement) * 8 - 1); 2562 if (negPreDest) 2563 destElem = ~destElem; 2564 fpscr.qc = 1; 2565 } 2566 FpscrQc = fpscr; 2567 ''' 2568 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True) 2569 2570 vqdmlslCode = ''' 2571 FPSCR fpscr = (FPSCR) FpscrQc; 2572 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2573 Element maxNeg = std::numeric_limits<Element>::min(); 2574 Element halfNeg = maxNeg / 2; 2575 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2576 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2577 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2578 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2579 fpscr.qc = 1; 2580 } 2581 bool negPreDest = ltz(destElem); 2582 destElem -= midElem; 2583 bool negDest = ltz(destElem); 2584 bool posMid = ltz((BigElement)-midElem); 2585 if (negPreDest == posMid && posMid != negDest) { 2586 destElem = mask(sizeof(BigElement) * 8 - 1); 2587 if (negPreDest) 2588 destElem = ~destElem; 2589 fpscr.qc = 1; 2590 } 2591 FpscrQc = fpscr; 2592 ''' 2593 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True) 2594 2595 vqdmullCode = ''' 2596 FPSCR fpscr = (FPSCR) FpscrQc; 2597 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2598 if (srcElem1 == srcElem2 && 2599 srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 2600 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2601 fpscr.qc = 1; 2602 } 2603 FpscrQc = fpscr; 2604 ''' 2605 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode) 2606 2607 vmlsCode = ''' 2608 destElem = destElem - srcElem1 * srcElem2; 2609 ''' 2610 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) 2611 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) 2612 vmlslCode = ''' 2613 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2; 2614 ''' 2615 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True) 2616 2617 vmulpCode = ''' 2618 destElem = 0; 2619 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2620 if (bits(srcElem2, j)) 2621 destElem ^= srcElem1 << j; 2622 } 2623 ''' 2624 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode) 2625 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode) 2626 vmullpCode = ''' 2627 destElem = 0; 2628 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2629 if (bits(srcElem2, j)) 2630 destElem ^= (BigElement)srcElem1 << j; 2631 } 2632 ''' 2633 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode) 2634 2635 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True) 2636 2637 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True) 2638 2639 vqdmulhCode = ''' 2640 FPSCR fpscr = (FPSCR) FpscrQc; 2641 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2642 (sizeof(Element) * 8); 2643 if (srcElem1 == srcElem2 && 2644 srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 2645 destElem = ~srcElem1; 2646 fpscr.qc = 1; 2647 } 2648 FpscrQc = fpscr; 2649 ''' 2650 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) 2651 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) 2652 2653 vqrdmulhCode = ''' 2654 FPSCR fpscr = (FPSCR) FpscrQc; 2655 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2656 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2657 (sizeof(Element) * 8); 2658 Element maxNeg = std::numeric_limits<Element>::min(); 2659 Element halfNeg = maxNeg / 2; 2660 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2661 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2662 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2663 if (destElem < 0) { 2664 destElem = mask(sizeof(Element) * 8 - 1); 2665 } else { 2666 destElem = std::numeric_limits<Element>::min(); 2667 } 2668 fpscr.qc = 1; 2669 } 2670 FpscrQc = fpscr; 2671 ''' 2672 threeEqualRegInst("vqrdmulh", "VqrdmulhD", 2673 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) 2674 threeEqualRegInst("vqrdmulh", "VqrdmulhQ", 2675 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) 2676 2677 vMinMaxFpCode = ''' 2678 destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr); 2679 ''' 2680 vMinMaxInsts = [ 2681 ("vmax", "VmaxDFp", 2, "Max", False, ), 2682 ("vmax", "VmaxQFp", 4, "Max", False, ), 2683 ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ), 2684 ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ), 2685 ("vpmax", "VpmaxDFp", 2, "Max", True, ), 2686 ("vpmax", "VpmaxQFp", 4, "Max", True, ), 2687 ("vmin", "VminDFp", 2, "Min", False, ), 2688 ("vmin", "VminQFp", 4, "Min", False, ), 2689 ("vminnm", "VminnmDFp", 2, "MinNum", False, ), 2690 ("vminnm", "VminnmQFp", 4, "MinNum", False, ), 2691 ("vpmin", "VpminDFp", 2, "Min", True, ), 2692 ("vpmin", "VpminQFp", 4, "Min", True, ), 2693 ] 2694 for name, Name, rCount, op, pairwise in vMinMaxInsts: 2695 threeEqualRegInst( 2696 name, 2697 Name, 2698 "SimdFloatCmpOp", 2699 ("uint32_t",), 2700 rCount, 2701 vMinMaxFpCode % op, 2702 pairwise=pairwise, 2703 standardFpcsr=True, 2704 ) 2705 2706 vaddfpCode = ''' 2707 FPSCR fpscr = (FPSCR) FpscrExc; 2708 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, 2709 true, true, VfpRoundNearest); 2710 FpscrExc = fpscr; 2711 ''' 2712 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode) 2713 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode) 2714 2715 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",), 2716 2, vaddfpCode, pairwise=True) 2717 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",), 2718 4, vaddfpCode, pairwise=True) 2719 2720 vsubfpCode = ''' 2721 FPSCR fpscr = (FPSCR) FpscrExc; 2722 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2723 true, true, VfpRoundNearest); 2724 FpscrExc = fpscr; 2725 ''' 2726 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode) 2727 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode) 2728 2729 vmulfpCode = ''' 2730 FPSCR fpscr = (FPSCR) FpscrExc; 2731 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2732 true, true, VfpRoundNearest); 2733 FpscrExc = fpscr; 2734 ''' 2735 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) 2736 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) 2737 2738 vmlafpCode = ''' 2739 FPSCR fpscr = (FPSCR) FpscrExc; 2740 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2741 true, true, VfpRoundNearest); 2742 destReg = binaryOp(fpscr, mid, destReg, fpAddS, 2743 true, true, VfpRoundNearest); 2744 FpscrExc = fpscr; 2745 ''' 2746 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) 2747 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) 2748 2749 vfmafpCode = ''' 2750 FPSCR fpscr = (FPSCR) FpscrExc; 2751 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>, 2752 true, true, VfpRoundNearest); 2753 FpscrExc = fpscr; 2754 ''' 2755 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True) 2756 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True) 2757 2758 vfmsfpCode = ''' 2759 FPSCR fpscr = (FPSCR) FpscrExc; 2760 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>, 2761 true, true, VfpRoundNearest); 2762 FpscrExc = fpscr; 2763 ''' 2764 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True) 2765 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True) 2766 2767 vmlsfpCode = ''' 2768 FPSCR fpscr = (FPSCR) FpscrExc; 2769 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2770 true, true, VfpRoundNearest); 2771 destReg = binaryOp(fpscr, destReg, mid, fpSubS, 2772 true, true, VfpRoundNearest); 2773 FpscrExc = fpscr; 2774 ''' 2775 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) 2776 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) 2777 2778 vcgtfpCode = ''' 2779 FPSCR fpscr = (FPSCR) FpscrExc; 2780 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, 2781 true, true, VfpRoundNearest); 2782 destReg = (res == 0) ? -1 : 0; 2783 if (res == 2.0) 2784 fpscr.ioc = 1; 2785 FpscrExc = fpscr; 2786 ''' 2787 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",), 2788 2, vcgtfpCode, toInt = True) 2789 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",), 2790 4, vcgtfpCode, toInt = True) 2791 2792 vcgefpCode = ''' 2793 FPSCR fpscr = (FPSCR) FpscrExc; 2794 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, 2795 true, true, VfpRoundNearest); 2796 destReg = (res == 0) ? -1 : 0; 2797 if (res == 2.0) 2798 fpscr.ioc = 1; 2799 FpscrExc = fpscr; 2800 ''' 2801 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",), 2802 2, vcgefpCode, toInt = True) 2803 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",), 2804 4, vcgefpCode, toInt = True) 2805 2806 vacgtfpCode = ''' 2807 FPSCR fpscr = (FPSCR) FpscrExc; 2808 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, 2809 true, true, VfpRoundNearest); 2810 destReg = (res == 0) ? -1 : 0; 2811 if (res == 2.0) 2812 fpscr.ioc = 1; 2813 FpscrExc = fpscr; 2814 ''' 2815 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",), 2816 2, vacgtfpCode, toInt = True) 2817 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",), 2818 4, vacgtfpCode, toInt = True) 2819 2820 vacgefpCode = ''' 2821 FPSCR fpscr = (FPSCR) FpscrExc; 2822 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, 2823 true, true, VfpRoundNearest); 2824 destReg = (res == 0) ? -1 : 0; 2825 if (res == 2.0) 2826 fpscr.ioc = 1; 2827 FpscrExc = fpscr; 2828 ''' 2829 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",), 2830 2, vacgefpCode, toInt = True) 2831 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",), 2832 4, vacgefpCode, toInt = True) 2833 2834 vceqfpCode = ''' 2835 FPSCR fpscr = (FPSCR) FpscrExc; 2836 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, 2837 true, true, VfpRoundNearest); 2838 destReg = (res == 0) ? -1 : 0; 2839 if (res == 2.0) 2840 fpscr.ioc = 1; 2841 FpscrExc = fpscr; 2842 ''' 2843 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",), 2844 2, vceqfpCode, toInt = True) 2845 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",), 2846 4, vceqfpCode, toInt = True) 2847 2848 vrecpsCode = ''' 2849 FPSCR fpscr = (FPSCR) FpscrExc; 2850 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, 2851 true, true, VfpRoundNearest); 2852 FpscrExc = fpscr; 2853 ''' 2854 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode) 2855 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode) 2856 2857 vrsqrtsCode = ''' 2858 FPSCR fpscr = (FPSCR) FpscrExc; 2859 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, 2860 true, true, VfpRoundNearest); 2861 FpscrExc = fpscr; 2862 ''' 2863 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode) 2864 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode) 2865 2866 vabdfpCode = ''' 2867 FPSCR fpscr = (FPSCR) FpscrExc; 2868 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2869 true, true, VfpRoundNearest); 2870 destReg = fabs(mid); 2871 FpscrExc = fpscr; 2872 ''' 2873 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode) 2874 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode) 2875 2876 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True) 2877 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True) 2878 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) 2879 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) 2880 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True) 2881 2882 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) 2883 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) 2884 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) 2885 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) 2886 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True) 2887 2888 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode) 2889 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode) 2890 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) 2891 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) 2892 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode) 2893 2894 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode) 2895 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True) 2896 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True) 2897 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) 2898 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) 2899 twoEqualRegInst("vqrdmulh", "VqrdmulhsD", 2900 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) 2901 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", 2902 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) 2903 2904 vshrCode = ''' 2905 if (imm >= sizeof(srcElem1) * 8) { 2906 if (ltz(srcElem1)) 2907 destElem = -1; 2908 else 2909 destElem = 0; 2910 } else { 2911 destElem = srcElem1 >> imm; 2912 } 2913 ''' 2914 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode) 2915 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode) 2916 2917 vsraCode = ''' 2918 Element mid;; 2919 if (imm >= sizeof(srcElem1) * 8) { 2920 mid = ltz(srcElem1) ? -1 : 0; 2921 } else { 2922 mid = srcElem1 >> imm; 2923 if (ltz(srcElem1) && !ltz(mid)) { 2924 mid |= -(mid & ((Element)1 << 2925 (sizeof(Element) * 8 - 1 - imm))); 2926 } 2927 } 2928 destElem += mid; 2929 ''' 2930 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True) 2931 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True) 2932 2933 vrshrCode = ''' 2934 if (imm > sizeof(srcElem1) * 8) { 2935 destElem = 0; 2936 } else if (imm) { 2937 Element rBit = bits(srcElem1, imm - 1); 2938 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2939 } else { 2940 destElem = srcElem1; 2941 } 2942 ''' 2943 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode) 2944 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode) 2945 2946 vrsraCode = ''' 2947 if (imm > sizeof(srcElem1) * 8) { 2948 destElem += 0; 2949 } else if (imm) { 2950 Element rBit = bits(srcElem1, imm - 1); 2951 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2952 } else { 2953 destElem += srcElem1; 2954 } 2955 ''' 2956 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True) 2957 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True) 2958 2959 vsriCode = ''' 2960 if (imm >= sizeof(Element) * 8) { 2961 destElem = destElem; 2962 } else { 2963 destElem = (srcElem1 >> imm) | 2964 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2965 } 2966 ''' 2967 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True) 2968 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True) 2969 2970 vshlCode = ''' 2971 if (imm >= sizeof(Element) * 8) { 2972 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 2973 } else { 2974 destElem = srcElem1 << imm; 2975 } 2976 ''' 2977 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode) 2978 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode) 2979 2980 vsliCode = ''' 2981 if (imm >= sizeof(Element) * 8) { 2982 destElem = destElem; 2983 } else { 2984 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 2985 } 2986 ''' 2987 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True) 2988 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True) 2989 2990 vqshlCode = ''' 2991 FPSCR fpscr = (FPSCR) FpscrQc; 2992 if (imm >= sizeof(Element) * 8) { 2993 if (srcElem1 != 0) { 2994 destElem = std::numeric_limits<Element>::min(); 2995 if (srcElem1 > 0) 2996 destElem = ~destElem; 2997 fpscr.qc = 1; 2998 } else { 2999 destElem = 0; 3000 } 3001 } else if (imm) { 3002 destElem = (srcElem1 << imm); 3003 uint64_t topBits = bits((uint64_t)srcElem1, 3004 sizeof(Element) * 8 - 1, 3005 sizeof(Element) * 8 - 1 - imm); 3006 if (topBits != 0 && topBits != mask(imm + 1)) { 3007 destElem = std::numeric_limits<Element>::min(); 3008 if (srcElem1 > 0) 3009 destElem = ~destElem; 3010 fpscr.qc = 1; 3011 } 3012 } else { 3013 destElem = srcElem1; 3014 } 3015 FpscrQc = fpscr; 3016 ''' 3017 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode) 3018 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode) 3019 3020 vqshluCode = ''' 3021 FPSCR fpscr = (FPSCR) FpscrQc; 3022 if (imm >= sizeof(Element) * 8) { 3023 if (srcElem1 != 0) { 3024 destElem = mask(sizeof(Element) * 8); 3025 fpscr.qc = 1; 3026 } else { 3027 destElem = 0; 3028 } 3029 } else if (imm) { 3030 destElem = (srcElem1 << imm); 3031 uint64_t topBits = bits((uint64_t)srcElem1, 3032 sizeof(Element) * 8 - 1, 3033 sizeof(Element) * 8 - imm); 3034 if (topBits != 0) { 3035 destElem = mask(sizeof(Element) * 8); 3036 fpscr.qc = 1; 3037 } 3038 } else { 3039 destElem = srcElem1; 3040 } 3041 FpscrQc = fpscr; 3042 ''' 3043 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode) 3044 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode) 3045 3046 vqshlusCode = ''' 3047 FPSCR fpscr = (FPSCR) FpscrQc; 3048 if (imm >= sizeof(Element) * 8) { 3049 if (srcElem1 < 0) { 3050 destElem = 0; 3051 fpscr.qc = 1; 3052 } else if (srcElem1 > 0) { 3053 destElem = mask(sizeof(Element) * 8); 3054 fpscr.qc = 1; 3055 } else { 3056 destElem = 0; 3057 } 3058 } else if (imm) { 3059 destElem = (srcElem1 << imm); 3060 uint64_t topBits = bits((uint64_t)srcElem1, 3061 sizeof(Element) * 8 - 1, 3062 sizeof(Element) * 8 - imm); 3063 if (srcElem1 < 0) { 3064 destElem = 0; 3065 fpscr.qc = 1; 3066 } else if (topBits != 0) { 3067 destElem = mask(sizeof(Element) * 8); 3068 fpscr.qc = 1; 3069 } 3070 } else { 3071 if (srcElem1 < 0) { 3072 fpscr.qc = 1; 3073 destElem = 0; 3074 } else { 3075 destElem = srcElem1; 3076 } 3077 } 3078 FpscrQc = fpscr; 3079 ''' 3080 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode) 3081 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode) 3082 3083 vshrnCode = ''' 3084 if (imm >= sizeof(srcElem1) * 8) { 3085 destElem = 0; 3086 } else { 3087 destElem = srcElem1 >> imm; 3088 } 3089 ''' 3090 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode) 3091 3092 vrshrnCode = ''' 3093 if (imm > sizeof(srcElem1) * 8) { 3094 destElem = 0; 3095 } else if (imm) { 3096 Element rBit = bits(srcElem1, imm - 1); 3097 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 3098 } else { 3099 destElem = srcElem1; 3100 } 3101 ''' 3102 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode) 3103 3104 vqshrnCode = ''' 3105 FPSCR fpscr = (FPSCR) FpscrQc; 3106 if (imm > sizeof(srcElem1) * 8) { 3107 if (srcElem1 != 0 && srcElem1 != -1) 3108 fpscr.qc = 1; 3109 destElem = 0; 3110 } else if (imm) { 3111 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3112 mid |= -(mid & ((BigElement)1 << 3113 (sizeof(BigElement) * 8 - 1 - imm))); 3114 if (mid != (Element)mid) { 3115 destElem = mask(sizeof(Element) * 8 - 1); 3116 if (srcElem1 < 0) 3117 destElem = ~destElem; 3118 fpscr.qc = 1; 3119 } else { 3120 destElem = mid; 3121 } 3122 } else { 3123 destElem = srcElem1; 3124 } 3125 FpscrQc = fpscr; 3126 ''' 3127 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode) 3128 3129 vqshrunCode = ''' 3130 FPSCR fpscr = (FPSCR) FpscrQc; 3131 if (imm > sizeof(srcElem1) * 8) { 3132 if (srcElem1 != 0) 3133 fpscr.qc = 1; 3134 destElem = 0; 3135 } else if (imm) { 3136 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3137 if (mid != (Element)mid) { 3138 destElem = mask(sizeof(Element) * 8); 3139 fpscr.qc = 1; 3140 } else { 3141 destElem = mid; 3142 } 3143 } else { 3144 destElem = srcElem1; 3145 } 3146 FpscrQc = fpscr; 3147 ''' 3148 twoRegNarrowShiftInst("vqshrun", "NVqshrun", 3149 "SimdShiftOp", smallUnsignedTypes, vqshrunCode) 3150 3151 vqshrunsCode = ''' 3152 FPSCR fpscr = (FPSCR) FpscrQc; 3153 if (imm > sizeof(srcElem1) * 8) { 3154 if (srcElem1 != 0) 3155 fpscr.qc = 1; 3156 destElem = 0; 3157 } else if (imm) { 3158 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3159 if (bits(mid, sizeof(BigElement) * 8 - 1, 3160 sizeof(Element) * 8) != 0) { 3161 if (srcElem1 < 0) { 3162 destElem = 0; 3163 } else { 3164 destElem = mask(sizeof(Element) * 8); 3165 } 3166 fpscr.qc = 1; 3167 } else { 3168 destElem = mid; 3169 } 3170 } else { 3171 destElem = srcElem1; 3172 } 3173 FpscrQc = fpscr; 3174 ''' 3175 twoRegNarrowShiftInst("vqshrun", "NVqshruns", 3176 "SimdShiftOp", smallSignedTypes, vqshrunsCode) 3177 3178 vqrshrnCode = ''' 3179 FPSCR fpscr = (FPSCR) FpscrQc; 3180 if (imm > sizeof(srcElem1) * 8) { 3181 if (srcElem1 != 0 && srcElem1 != -1) 3182 fpscr.qc = 1; 3183 destElem = 0; 3184 } else if (imm) { 3185 BigElement mid = (srcElem1 >> (imm - 1)); 3186 uint64_t rBit = mid & 0x1; 3187 mid >>= 1; 3188 mid |= -(mid & ((BigElement)1 << 3189 (sizeof(BigElement) * 8 - 1 - imm))); 3190 mid += rBit; 3191 if (mid != (Element)mid) { 3192 destElem = mask(sizeof(Element) * 8 - 1); 3193 if (srcElem1 < 0) 3194 destElem = ~destElem; 3195 fpscr.qc = 1; 3196 } else { 3197 destElem = mid; 3198 } 3199 } else { 3200 if (srcElem1 != (Element)srcElem1) { 3201 destElem = mask(sizeof(Element) * 8 - 1); 3202 if (srcElem1 < 0) 3203 destElem = ~destElem; 3204 fpscr.qc = 1; 3205 } else { 3206 destElem = srcElem1; 3207 } 3208 } 3209 FpscrQc = fpscr; 3210 ''' 3211 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", 3212 "SimdShiftOp", smallSignedTypes, vqrshrnCode) 3213 3214 vqrshrunCode = ''' 3215 FPSCR fpscr = (FPSCR) FpscrQc; 3216 if (imm > sizeof(srcElem1) * 8) { 3217 if (srcElem1 != 0) 3218 fpscr.qc = 1; 3219 destElem = 0; 3220 } else if (imm) { 3221 BigElement mid = (srcElem1 >> (imm - 1)); 3222 uint64_t rBit = mid & 0x1; 3223 mid >>= 1; 3224 mid += rBit; 3225 if (mid != (Element)mid) { 3226 destElem = mask(sizeof(Element) * 8); 3227 fpscr.qc = 1; 3228 } else { 3229 destElem = mid; 3230 } 3231 } else { 3232 if (srcElem1 != (Element)srcElem1) { 3233 destElem = mask(sizeof(Element) * 8 - 1); 3234 fpscr.qc = 1; 3235 } else { 3236 destElem = srcElem1; 3237 } 3238 } 3239 FpscrQc = fpscr; 3240 ''' 3241 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", 3242 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode) 3243 3244 vqrshrunsCode = ''' 3245 FPSCR fpscr = (FPSCR) FpscrQc; 3246 if (imm > sizeof(srcElem1) * 8) { 3247 if (srcElem1 != 0) 3248 fpscr.qc = 1; 3249 destElem = 0; 3250 } else if (imm) { 3251 BigElement mid = (srcElem1 >> (imm - 1)); 3252 uint64_t rBit = mid & 0x1; 3253 mid >>= 1; 3254 mid |= -(mid & ((BigElement)1 << 3255 (sizeof(BigElement) * 8 - 1 - imm))); 3256 mid += rBit; 3257 if (bits(mid, sizeof(BigElement) * 8 - 1, 3258 sizeof(Element) * 8) != 0) { 3259 if (srcElem1 < 0) { 3260 destElem = 0; 3261 } else { 3262 destElem = mask(sizeof(Element) * 8); 3263 } 3264 fpscr.qc = 1; 3265 } else { 3266 destElem = mid; 3267 } 3268 } else { 3269 if (srcElem1 < 0) { 3270 fpscr.qc = 1; 3271 destElem = 0; 3272 } else { 3273 destElem = srcElem1; 3274 } 3275 } 3276 FpscrQc = fpscr; 3277 ''' 3278 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", 3279 "SimdShiftOp", smallSignedTypes, vqrshrunsCode) 3280 3281 vshllCode = ''' 3282 if (imm >= sizeof(destElem) * 8) { 3283 destElem = 0; 3284 } else { 3285 destElem = (BigElement)srcElem1 << imm; 3286 } 3287 ''' 3288 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode) 3289 3290 vmovlCode = ''' 3291 destElem = srcElem1; 3292 ''' 3293 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode) 3294 3295 vcvt2ufxCode = ''' 3296 FPSCR fpscr = (FPSCR) FpscrExc; 3297 if (flushToZero(srcElem1)) 3298 fpscr.idc = 1; 3299 VfpSavedState state = prepFpState(VfpRoundNearest); 3300 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 3301 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm); 3302 __asm__ __volatile__("" :: "m" (destReg)); 3303 finishVfp(fpscr, state, true); 3304 FpscrExc = fpscr; 3305 ''' 3306 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",), 3307 2, vcvt2ufxCode, toInt = True) 3308 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",), 3309 4, vcvt2ufxCode, toInt = True) 3310 3311 vcvt2sfxCode = ''' 3312 FPSCR fpscr = (FPSCR) FpscrExc; 3313 if (flushToZero(srcElem1)) 3314 fpscr.idc = 1; 3315 VfpSavedState state = prepFpState(VfpRoundNearest); 3316 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 3317 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm); 3318 __asm__ __volatile__("" :: "m" (destReg)); 3319 finishVfp(fpscr, state, true); 3320 FpscrExc = fpscr; 3321 ''' 3322 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",), 3323 2, vcvt2sfxCode, toInt = True) 3324 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",), 3325 4, vcvt2sfxCode, toInt = True) 3326 3327 vcvtu2fpCode = ''' 3328 FPSCR fpscr = (FPSCR) FpscrExc; 3329 VfpSavedState state = prepFpState(VfpRoundNearest); 3330 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 3331 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm); 3332 __asm__ __volatile__("" :: "m" (destElem)); 3333 finishVfp(fpscr, state, true); 3334 FpscrExc = fpscr; 3335 ''' 3336 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",), 3337 2, vcvtu2fpCode, fromInt = True) 3338 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",), 3339 4, vcvtu2fpCode, fromInt = True) 3340 3341 vcvts2fpCode = ''' 3342 FPSCR fpscr = (FPSCR) FpscrExc; 3343 VfpSavedState state = prepFpState(VfpRoundNearest); 3344 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 3345 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm); 3346 __asm__ __volatile__("" :: "m" (destElem)); 3347 finishVfp(fpscr, state, true); 3348 FpscrExc = fpscr; 3349 ''' 3350 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",), 3351 2, vcvts2fpCode, fromInt = True) 3352 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",), 3353 4, vcvts2fpCode, fromInt = True) 3354 3355 vcvts2hCode = ''' 3356 destElem = 0; 3357 FPSCR fpscr = (FPSCR) FpscrExc; 3358 float srcFp1 = bitsToFp(srcElem1, (float)0.0); 3359 if (flushToZero(srcFp1)) 3360 fpscr.idc = 1; 3361 VfpSavedState state = prepFpState(VfpRoundNearest); 3362 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem) 3363 : "m" (srcFp1), "m" (destElem)); 3364 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest, 3365 fpscr.ahp, srcFp1); 3366 __asm__ __volatile__("" :: "m" (destElem)); 3367 finishVfp(fpscr, state, true); 3368 FpscrExc = fpscr; 3369 ''' 3370 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode) 3371 3372 vcvth2sCode = ''' 3373 destElem = 0; 3374 FPSCR fpscr = (FPSCR) FpscrExc; 3375 VfpSavedState state = prepFpState(VfpRoundNearest); 3376 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) 3377 : "m" (srcElem1), "m" (destElem)); 3378 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); 3379 __asm__ __volatile__("" :: "m" (destElem)); 3380 finishVfp(fpscr, state, true); 3381 FpscrExc = fpscr; 3382 ''' 3383 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) 3384 3385 vrsqrteCode = ''' 3386 destElem = unsignedRSqrtEstimate(srcElem1); 3387 ''' 3388 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode) 3389 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode) 3390 3391 vrsqrtefpCode = ''' 3392 FPSCR fpscr = (FPSCR) FpscrExc; 3393 if (flushToZero(srcReg1)) 3394 fpscr.idc = 1; 3395 destReg = fprSqrtEstimate(fpscr, srcReg1); 3396 FpscrExc = fpscr; 3397 ''' 3398 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode) 3399 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode) 3400 3401 vrecpeCode = ''' 3402 destElem = unsignedRecipEstimate(srcElem1); 3403 ''' 3404 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode) 3405 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode) 3406 3407 vrecpefpCode = ''' 3408 FPSCR fpscr = (FPSCR) FpscrExc; 3409 if (flushToZero(srcReg1)) 3410 fpscr.idc = 1; 3411 destReg = fpRecipEstimate(fpscr, srcReg1); 3412 FpscrExc = fpscr; 3413 ''' 3414 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode) 3415 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode) 3416 3417 vrev16Code = ''' 3418 destElem = srcElem1; 3419 unsigned groupSize = ((1 << 1) / sizeof(Element)); 3420 unsigned reverseMask = (groupSize - 1); 3421 j = i ^ reverseMask; 3422 ''' 3423 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code) 3424 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code) 3425 vrev32Code = ''' 3426 destElem = srcElem1; 3427 unsigned groupSize = ((1 << 2) / sizeof(Element)); 3428 unsigned reverseMask = (groupSize - 1); 3429 j = i ^ reverseMask; 3430 ''' 3431 twoRegMiscInst("vrev32", "NVrev32D", 3432 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code) 3433 twoRegMiscInst("vrev32", "NVrev32Q", 3434 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code) 3435 vrev64Code = ''' 3436 destElem = srcElem1; 3437 unsigned groupSize = ((1 << 3) / sizeof(Element)); 3438 unsigned reverseMask = (groupSize - 1); 3439 j = i ^ reverseMask; 3440 ''' 3441 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code) 3442 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code) 3443 3444 split('exec') 3445 exec_output += vcompares + vcomparesL 3446 3447 vpaddlCode = ''' 3448 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 3449 ''' 3450 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode) 3451 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode) 3452 3453 vpadalCode = ''' 3454 destElem += (BigElement)srcElem1 + (BigElement)srcElem2; 3455 ''' 3456 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True) 3457 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True) 3458 3459 vclsCode = ''' 3460 unsigned count = 0; 3461 if (srcElem1 < 0) { 3462 srcElem1 <<= 1; 3463 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 3464 count++; 3465 srcElem1 <<= 1; 3466 } 3467 } else { 3468 srcElem1 <<= 1; 3469 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 3470 count++; 3471 srcElem1 <<= 1; 3472 } 3473 } 3474 destElem = count; 3475 ''' 3476 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode) 3477 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode) 3478 3479 vclzCode = ''' 3480 unsigned count = 0; 3481 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 3482 count++; 3483 srcElem1 <<= 1; 3484 } 3485 destElem = count; 3486 ''' 3487 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode) 3488 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode) 3489 3490 vcntCode = ''' 3491 unsigned count = 0; 3492 while (srcElem1 && count < sizeof(Element) * 8) { 3493 count += srcElem1 & 0x1; 3494 srcElem1 >>= 1; 3495 } 3496 destElem = count; 3497 ''' 3498 3499 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode) 3500 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode) 3501 3502 vmvnCode = ''' 3503 destElem = ~srcElem1; 3504 ''' 3505 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) 3506 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) 3507 3508 vqabsCode = ''' 3509 FPSCR fpscr = (FPSCR) FpscrQc; 3510 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 3511 fpscr.qc = 1; 3512 destElem = ~srcElem1; 3513 } else if (srcElem1 < 0) { 3514 destElem = -srcElem1; 3515 } else { 3516 destElem = srcElem1; 3517 } 3518 FpscrQc = fpscr; 3519 ''' 3520 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode) 3521 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode) 3522 3523 vqnegCode = ''' 3524 FPSCR fpscr = (FPSCR) FpscrQc; 3525 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 3526 fpscr.qc = 1; 3527 destElem = ~srcElem1; 3528 } else { 3529 destElem = -srcElem1; 3530 } 3531 FpscrQc = fpscr; 3532 ''' 3533 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode) 3534 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode) 3535 3536 vabsCode = ''' 3537 if (srcElem1 < 0) { 3538 destElem = -srcElem1; 3539 } else { 3540 destElem = srcElem1; 3541 } 3542 ''' 3543 3544 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode) 3545 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode) 3546 vabsfpCode = ''' 3547 union 3548 { 3549 uint32_t i; 3550 float f; 3551 } cStruct; 3552 cStruct.f = srcReg1; 3553 cStruct.i &= mask(sizeof(Element) * 8 - 1); 3554 destReg = cStruct.f; 3555 ''' 3556 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode) 3557 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode) 3558 3559 vnegCode = ''' 3560 destElem = -srcElem1; 3561 ''' 3562 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode) 3563 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode) 3564 vnegfpCode = ''' 3565 destReg = -srcReg1; 3566 ''' 3567 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode) 3568 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode) 3569 3570 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;' 3571 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode) 3572 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode) 3573 vcgtfpCode = ''' 3574 FPSCR fpscr = (FPSCR) FpscrExc; 3575 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc, 3576 true, true, VfpRoundNearest); 3577 destReg = (res == 0) ? -1 : 0; 3578 if (res == 2.0) 3579 fpscr.ioc = 1; 3580 FpscrExc = fpscr; 3581 ''' 3582 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",), 3583 2, vcgtfpCode, toInt = True) 3584 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",), 3585 4, vcgtfpCode, toInt = True) 3586 3587 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;' 3588 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode) 3589 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode) 3590 vcgefpCode = ''' 3591 FPSCR fpscr = (FPSCR) FpscrExc; 3592 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc, 3593 true, true, VfpRoundNearest); 3594 destReg = (res == 0) ? -1 : 0; 3595 if (res == 2.0) 3596 fpscr.ioc = 1; 3597 FpscrExc = fpscr; 3598 ''' 3599 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",), 3600 2, vcgefpCode, toInt = True) 3601 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",), 3602 4, vcgefpCode, toInt = True) 3603 3604 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;' 3605 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode) 3606 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode) 3607 vceqfpCode = ''' 3608 FPSCR fpscr = (FPSCR) FpscrExc; 3609 float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc, 3610 true, true, VfpRoundNearest); 3611 destReg = (res == 0) ? -1 : 0; 3612 if (res == 2.0) 3613 fpscr.ioc = 1; 3614 FpscrExc = fpscr; 3615 ''' 3616 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",), 3617 2, vceqfpCode, toInt = True) 3618 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",), 3619 4, vceqfpCode, toInt = True) 3620 3621 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;' 3622 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode) 3623 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode) 3624 vclefpCode = ''' 3625 FPSCR fpscr = (FPSCR) FpscrExc; 3626 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc, 3627 true, true, VfpRoundNearest); 3628 destReg = (res == 0) ? -1 : 0; 3629 if (res == 2.0) 3630 fpscr.ioc = 1; 3631 FpscrExc = fpscr; 3632 ''' 3633 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",), 3634 2, vclefpCode, toInt = True) 3635 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",), 3636 4, vclefpCode, toInt = True) 3637 3638 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;' 3639 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode) 3640 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode) 3641 vcltfpCode = ''' 3642 FPSCR fpscr = (FPSCR) FpscrExc; 3643 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc, 3644 true, true, VfpRoundNearest); 3645 destReg = (res == 0) ? -1 : 0; 3646 if (res == 2.0) 3647 fpscr.ioc = 1; 3648 FpscrExc = fpscr; 3649 ''' 3650 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",), 3651 2, vcltfpCode, toInt = True) 3652 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",), 3653 4, vcltfpCode, toInt = True) 3654 3655 vswpCode = ''' 3656 uint32_t mid; 3657 for (unsigned r = 0; r < rCount; r++) { 3658 mid = srcReg1.regs[r]; 3659 srcReg1.regs[r] = destReg.regs[r]; 3660 destReg.regs[r] = mid; 3661 } 3662 ''' 3663 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode) 3664 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode) 3665 3666 vtrnCode = ''' 3667 Element mid; 3668 for (unsigned i = 0; i < eCount; i += 2) { 3669 mid = srcReg1.elements[i]; 3670 srcReg1.elements[i] = destReg.elements[i + 1]; 3671 destReg.elements[i + 1] = mid; 3672 } 3673 ''' 3674 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", 3675 smallUnsignedTypes, 2, vtrnCode) 3676 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", 3677 smallUnsignedTypes, 4, vtrnCode) 3678 3679 vuzpCode = ''' 3680 Element mid[eCount]; 3681 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3682 for (unsigned i = 0; i < eCount / 2; i++) { 3683 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3684 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3685 destReg.elements[i] = destReg.elements[2 * i]; 3686 } 3687 for (unsigned i = 0; i < eCount / 2; i++) { 3688 destReg.elements[eCount / 2 + i] = mid[2 * i]; 3689 } 3690 ''' 3691 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode) 3692 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode) 3693 3694 vzipCode = ''' 3695 Element mid[eCount]; 3696 memcpy(&mid, &destReg, sizeof(destReg)); 3697 for (unsigned i = 0; i < eCount / 2; i++) { 3698 destReg.elements[2 * i] = mid[i]; 3699 destReg.elements[2 * i + 1] = srcReg1.elements[i]; 3700 } 3701 for (int i = 0; i < eCount / 2; i++) { 3702 srcReg1.elements[2 * i] = mid[eCount / 2 + i]; 3703 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i]; 3704 } 3705 ''' 3706 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode) 3707 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode) 3708 3709 vmovnCode = 'destElem = srcElem1;' 3710 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode) 3711 3712 vdupCode = 'destElem = srcElem1;' 3713 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode) 3714 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode) 3715 3716 def vdupGprInst(name, Name, opClass, types, rCount): 3717 global header_output, exec_output 3718 eWalkCode = simdEnabledCheckCode + ''' 3719 RegVect destReg; 3720 for (unsigned i = 0; i < eCount; i++) { 3721 destReg.elements[i] = htog((Element)Op1); 3722 } 3723 ''' 3724 for reg in range(rCount): 3725 eWalkCode += ''' 3726 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3727 ''' % { "reg" : reg } 3728 iop = InstObjParams(name, Name, 3729 "RegRegOp", 3730 { "code": eWalkCode, 3731 "r_count": rCount, 3732 "predicate_test": predicateTest, 3733 "op_class": opClass }, []) 3734 header_output += NeonRegRegOpDeclare.subst(iop) 3735 exec_output += NeonEqualRegExecute.subst(iop) 3736 for type in types: 3737 substDict = { "targs" : type, 3738 "class_name" : Name } 3739 exec_output += NeonExecDeclare.subst(substDict) 3740 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2) 3741 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4) 3742 3743 vmovCode = 'destElem = imm;' 3744 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode) 3745 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode) 3746 3747 vorrCode = 'destElem |= imm;' 3748 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True) 3749 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True) 3750 3751 vmvnCode = 'destElem = ~imm;' 3752 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) 3753 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) 3754 3755 vbicCode = 'destElem &= ~imm;' 3756 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True) 3757 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True) 3758 3759 vqmovnCode = ''' 3760 FPSCR fpscr = (FPSCR) FpscrQc; 3761 destElem = srcElem1; 3762 if ((BigElement)destElem != srcElem1) { 3763 fpscr.qc = 1; 3764 destElem = mask(sizeof(Element) * 8 - 1); 3765 if (srcElem1 < 0) 3766 destElem = ~destElem; 3767 } 3768 FpscrQc = fpscr; 3769 ''' 3770 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode) 3771 3772 vqmovunCode = ''' 3773 FPSCR fpscr = (FPSCR) FpscrQc; 3774 destElem = srcElem1; 3775 if ((BigElement)destElem != srcElem1) { 3776 fpscr.qc = 1; 3777 destElem = mask(sizeof(Element) * 8); 3778 } 3779 FpscrQc = fpscr; 3780 ''' 3781 twoRegNarrowMiscInst("vqmovun", "NVqmovun", 3782 "SimdMiscOp", smallUnsignedTypes, vqmovunCode) 3783 3784 vqmovunsCode = ''' 3785 FPSCR fpscr = (FPSCR) FpscrQc; 3786 destElem = srcElem1; 3787 if (srcElem1 < 0 || 3788 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 3789 fpscr.qc = 1; 3790 destElem = mask(sizeof(Element) * 8); 3791 if (srcElem1 < 0) 3792 destElem = ~destElem; 3793 } 3794 FpscrQc = fpscr; 3795 ''' 3796 twoRegNarrowMiscInst("vqmovun", "NVqmovuns", 3797 "SimdMiscOp", smallSignedTypes, vqmovunsCode) 3798 3799 def buildVext(name, Name, opClass, types, rCount, op): 3800 global header_output, exec_output 3801 eWalkCode = simdEnabledCheckCode + ''' 3802 RegVect srcReg1, srcReg2, destReg; 3803 ''' 3804 for reg in range(rCount): 3805 eWalkCode += ''' 3806 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 3807 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 3808 ''' % { "reg" : reg } 3809 eWalkCode += op 3810 for reg in range(rCount): 3811 eWalkCode += ''' 3812 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3813 ''' % { "reg" : reg } 3814 iop = InstObjParams(name, Name, 3815 "RegRegRegImmOp", 3816 { "code": eWalkCode, 3817 "r_count": rCount, 3818 "predicate_test": predicateTest, 3819 "op_class": opClass }, []) 3820 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 3821 exec_output += NeonEqualRegExecute.subst(iop) 3822 for type in types: 3823 substDict = { "targs" : type, 3824 "class_name" : Name } 3825 exec_output += NeonExecDeclare.subst(substDict) 3826 3827 vextCode = ''' 3828 for (unsigned i = 0; i < eCount; i++) { 3829 unsigned index = i + imm; 3830 if (index < eCount) { 3831 destReg.elements[i] = srcReg1.elements[index]; 3832 } else { 3833 index -= eCount; 3834 if (index >= eCount) { 3835 fault = std::make_shared<UndefinedInstruction>(machInst, 3836 false, 3837 mnemonic); 3838 } else { 3839 destReg.elements[i] = srcReg2.elements[index]; 3840 } 3841 } 3842 } 3843 ''' 3844 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode) 3845 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode) 3846 3847 def buildVtbxl(name, Name, opClass, length, isVtbl): 3848 global header_output, decoder_output, exec_output 3849 code = simdEnabledCheckCode + ''' 3850 union 3851 { 3852 uint8_t bytes[32]; 3853 uint32_t regs[8]; 3854 } table; 3855 3856 union 3857 { 3858 uint8_t bytes[8]; 3859 uint32_t regs[2]; 3860 } destReg, srcReg2; 3861 3862 const unsigned length = %(length)d; 3863 const bool isVtbl = %(isVtbl)s; 3864 3865 srcReg2.regs[0] = htog(FpOp2P0_uw); 3866 srcReg2.regs[1] = htog(FpOp2P1_uw); 3867 3868 destReg.regs[0] = htog(FpDestP0_uw); 3869 destReg.regs[1] = htog(FpDestP1_uw); 3870 ''' % { "length" : length, "isVtbl" : isVtbl } 3871 for reg in range(8): 3872 if reg < length * 2: 3873 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \ 3874 { "reg" : reg } 3875 else: 3876 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg } 3877 code += ''' 3878 for (unsigned i = 0; i < sizeof(destReg); i++) { 3879 uint8_t index = srcReg2.bytes[i]; 3880 if (index < 8 * length) { 3881 destReg.bytes[i] = table.bytes[index]; 3882 } else { 3883 if (isVtbl) 3884 destReg.bytes[i] = 0; 3885 // else destReg.bytes[i] unchanged 3886 } 3887 } 3888 3889 FpDestP0_uw = gtoh(destReg.regs[0]); 3890 FpDestP1_uw = gtoh(destReg.regs[1]); 3891 ''' 3892 iop = InstObjParams(name, Name, 3893 "RegRegRegOp", 3894 { "code": code, 3895 "predicate_test": predicateTest, 3896 "op_class": opClass }, []) 3897 header_output += RegRegRegOpDeclare.subst(iop) 3898 decoder_output += RegRegRegOpConstructor.subst(iop) 3899 exec_output += PredOpExecute.subst(iop) 3900 3901 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true") 3902 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true") 3903 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true") 3904 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true") 3905 3906 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false") 3907 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false") 3908 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false") 3909 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false") 3910}}; 3911