neon.isa revision 13978
1// -*- mode:c++ -*- 2 3// Copyright (c) 2010-2011, 2015, 2019 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Gabe Black 39 40output header {{ 41 template <template <typename T> class Base> 42 StaticInstPtr 43 decodeNeonUThreeUReg(unsigned size, 44 ExtMachInst machInst, IntRegIndex dest, 45 IntRegIndex op1, IntRegIndex op2) 46 { 47 switch (size) { 48 case 0: 49 return new Base<uint8_t>(machInst, dest, op1, op2); 50 case 1: 51 return new Base<uint16_t>(machInst, dest, op1, op2); 52 case 2: 53 return new Base<uint32_t>(machInst, dest, op1, op2); 54 case 3: 55 return new Base<uint64_t>(machInst, dest, op1, op2); 56 default: 57 return new Unknown(machInst); 58 } 59 } 60 61 template <template <typename T> class Base> 62 StaticInstPtr 63 decodeNeonSThreeUReg(unsigned size, 64 ExtMachInst machInst, IntRegIndex dest, 65 IntRegIndex op1, IntRegIndex op2) 66 { 67 switch (size) { 68 case 0: 69 return new Base<int8_t>(machInst, dest, op1, op2); 70 case 1: 71 return new Base<int16_t>(machInst, dest, op1, op2); 72 case 2: 73 return new Base<int32_t>(machInst, dest, op1, op2); 74 case 3: 75 return new Base<int64_t>(machInst, dest, op1, op2); 76 default: 77 return new Unknown(machInst); 78 } 79 } 80 81 template <template <typename T> class Base> 82 StaticInstPtr 83 decodeNeonUSThreeUReg(bool notSigned, unsigned size, 84 ExtMachInst machInst, IntRegIndex dest, 85 IntRegIndex op1, IntRegIndex op2) 86 { 87 if (notSigned) { 88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2); 89 } else { 90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2); 91 } 92 } 93 94 template <template <typename T> class Base> 95 StaticInstPtr 96 decodeNeonUThreeUSReg(unsigned size, 97 ExtMachInst machInst, IntRegIndex dest, 98 IntRegIndex op1, IntRegIndex op2) 99 { 100 switch (size) { 101 case 0: 102 return new Base<uint8_t>(machInst, dest, op1, op2); 103 case 1: 104 return new Base<uint16_t>(machInst, dest, op1, op2); 105 case 2: 106 return new Base<uint32_t>(machInst, dest, op1, op2); 107 default: 108 return new Unknown(machInst); 109 } 110 } 111 112 template <template <typename T> class Base> 113 StaticInstPtr 114 decodeNeonSThreeUSReg(unsigned size, 115 ExtMachInst machInst, IntRegIndex dest, 116 IntRegIndex op1, IntRegIndex op2) 117 { 118 switch (size) { 119 case 0: 120 return new Base<int8_t>(machInst, dest, op1, op2); 121 case 1: 122 return new Base<int16_t>(machInst, dest, op1, op2); 123 case 2: 124 return new Base<int32_t>(machInst, dest, op1, op2); 125 default: 126 return new Unknown(machInst); 127 } 128 } 129 130 template <template <typename T> class Base> 131 StaticInstPtr 132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst, 133 IntRegIndex dest, IntRegIndex op1, 134 IntRegIndex op2) 135 { 136 switch (size) { 137 case 1: 138 return new Base<int16_t>(machInst, dest, op1, op2); 139 case 2: 140 return new Base<int32_t>(machInst, dest, op1, op2); 141 default: 142 return new Unknown(machInst); 143 } 144 } 145 146 template <template <typename T> class Base> 147 StaticInstPtr 148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst, 149 IntRegIndex dest, IntRegIndex op1, 150 IntRegIndex op2, uint64_t imm) 151 { 152 switch (size) { 153 case 1: 154 return new Base<int16_t>(machInst, dest, op1, op2, imm); 155 case 2: 156 return new Base<int32_t>(machInst, dest, op1, op2, imm); 157 default: 158 return new Unknown(machInst); 159 } 160 } 161 162 template <template <typename T> class Base> 163 StaticInstPtr 164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size, 165 ExtMachInst machInst, IntRegIndex dest, 166 IntRegIndex op1, IntRegIndex op2) 167 { 168 if (notSigned) { 169 return decodeNeonUThreeUSReg<Base>( 170 size, machInst, dest, op1, op2); 171 } else { 172 return decodeNeonSThreeUSReg<Base>( 173 size, machInst, dest, op1, op2); 174 } 175 } 176 177 template <template <typename T> class BaseD, 178 template <typename T> class BaseQ> 179 StaticInstPtr 180 decodeNeonUThreeSReg(bool q, unsigned size, 181 ExtMachInst machInst, IntRegIndex dest, 182 IntRegIndex op1, IntRegIndex op2) 183 { 184 if (q) { 185 return decodeNeonUThreeUSReg<BaseQ>( 186 size, machInst, dest, op1, op2); 187 } else { 188 return decodeNeonUThreeUSReg<BaseD>( 189 size, machInst, dest, op1, op2); 190 } 191 } 192 193 template <template <typename T> class BaseD, 194 template <typename T> class BaseQ> 195 StaticInstPtr 196 decodeNeonSThreeSReg(bool q, unsigned size, 197 ExtMachInst machInst, IntRegIndex dest, 198 IntRegIndex op1, IntRegIndex op2) 199 { 200 if (q) { 201 return decodeNeonSThreeUSReg<BaseQ>( 202 size, machInst, dest, op1, op2); 203 } else { 204 return decodeNeonSThreeUSReg<BaseD>( 205 size, machInst, dest, op1, op2); 206 } 207 } 208 209 template <template <typename T> class BaseD, 210 template <typename T> class BaseQ> 211 StaticInstPtr 212 decodeNeonSThreeXReg(bool q, unsigned size, 213 ExtMachInst machInst, IntRegIndex dest, 214 IntRegIndex op1, IntRegIndex op2) 215 { 216 if (q) { 217 return decodeNeonSThreeUReg<BaseQ>( 218 size, machInst, dest, op1, op2); 219 } else { 220 return decodeNeonSThreeUSReg<BaseD>( 221 size, machInst, dest, op1, op2); 222 } 223 } 224 225 template <template <typename T> class BaseD, 226 template <typename T> class BaseQ> 227 StaticInstPtr 228 decodeNeonUThreeXReg(bool q, unsigned size, 229 ExtMachInst machInst, IntRegIndex dest, 230 IntRegIndex op1, IntRegIndex op2) 231 { 232 if (q) { 233 return decodeNeonUThreeUReg<BaseQ>( 234 size, machInst, dest, op1, op2); 235 } else { 236 return decodeNeonUThreeUSReg<BaseD>( 237 size, machInst, dest, op1, op2); 238 } 239 } 240 241 template <template <typename T> class BaseD, 242 template <typename T> class BaseQ> 243 StaticInstPtr 244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, 245 ExtMachInst machInst, IntRegIndex dest, 246 IntRegIndex op1, IntRegIndex op2) 247 { 248 if (notSigned) { 249 return decodeNeonUThreeSReg<BaseD, BaseQ>( 250 q, size, machInst, dest, op1, op2); 251 } else { 252 return decodeNeonSThreeSReg<BaseD, BaseQ>( 253 q, size, machInst, dest, op1, op2); 254 } 255 } 256 257 template <template <typename T> class BaseD, 258 template <typename T> class BaseQ> 259 StaticInstPtr 260 decodeNeonUThreeReg(bool q, unsigned size, 261 ExtMachInst machInst, IntRegIndex dest, 262 IntRegIndex op1, IntRegIndex op2) 263 { 264 if (q) { 265 return decodeNeonUThreeUReg<BaseQ>( 266 size, machInst, dest, op1, op2); 267 } else { 268 return decodeNeonUThreeUReg<BaseD>( 269 size, machInst, dest, op1, op2); 270 } 271 } 272 273 template <template <typename T> class BaseD, 274 template <typename T> class BaseQ> 275 StaticInstPtr 276 decodeNeonSThreeReg(bool q, unsigned size, 277 ExtMachInst machInst, IntRegIndex dest, 278 IntRegIndex op1, IntRegIndex op2) 279 { 280 if (q) { 281 return decodeNeonSThreeUReg<BaseQ>( 282 size, machInst, dest, op1, op2); 283 } else { 284 return decodeNeonSThreeUReg<BaseD>( 285 size, machInst, dest, op1, op2); 286 } 287 } 288 289 template <template <typename T> class BaseD, 290 template <typename T> class BaseQ> 291 StaticInstPtr 292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size, 293 ExtMachInst machInst, IntRegIndex dest, 294 IntRegIndex op1, IntRegIndex op2) 295 { 296 if (notSigned) { 297 return decodeNeonUThreeReg<BaseD, BaseQ>( 298 q, size, machInst, dest, op1, op2); 299 } else { 300 return decodeNeonSThreeReg<BaseD, BaseQ>( 301 q, size, machInst, dest, op1, op2); 302 } 303 } 304 305 template <template <typename T> class BaseD, 306 template <typename T> class BaseQ> 307 StaticInstPtr 308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst, 309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) 310 { 311 if (q) { 312 if (size) 313 return new BaseQ<uint64_t>(machInst, dest, op1, op2); 314 else 315 return new BaseQ<uint32_t>(machInst, dest, op1, op2); 316 } else { 317 if (size) 318 return new Unknown(machInst); 319 else 320 return new BaseD<uint32_t>(machInst, dest, op1, op2); 321 } 322 } 323 324 template <template <typename T> class Base> 325 StaticInstPtr 326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst, 327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) 328 { 329 if (size) 330 return new Base<uint64_t>(machInst, dest, op1, op2); 331 else 332 return new Base<uint32_t>(machInst, dest, op1, op2); 333 } 334 335 template <template <typename T> class Base> 336 StaticInstPtr 337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst, 338 IntRegIndex dest, IntRegIndex op1, 339 IntRegIndex op2, uint64_t imm) 340 { 341 if (size) 342 return new Base<uint64_t>(machInst, dest, op1, op2, imm); 343 else 344 return new Base<uint32_t>(machInst, dest, op1, op2, imm); 345 } 346 347 template <template <typename T> class BaseD, 348 template <typename T> class BaseQ> 349 StaticInstPtr 350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, 351 IntRegIndex dest, IntRegIndex op1, 352 IntRegIndex op2, uint64_t imm) 353 { 354 if (q) { 355 switch (size) { 356 case 1: 357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm); 358 case 2: 359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); 360 default: 361 return new Unknown(machInst); 362 } 363 } else { 364 switch (size) { 365 case 1: 366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm); 367 case 2: 368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); 369 default: 370 return new Unknown(machInst); 371 } 372 } 373 } 374 375 template <template <typename T> class BaseD, 376 template <typename T> class BaseQ> 377 StaticInstPtr 378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, 379 IntRegIndex dest, IntRegIndex op1, 380 IntRegIndex op2, uint64_t imm) 381 { 382 if (q) { 383 switch (size) { 384 case 1: 385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm); 386 case 2: 387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm); 388 default: 389 return new Unknown(machInst); 390 } 391 } else { 392 switch (size) { 393 case 1: 394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm); 395 case 2: 396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm); 397 default: 398 return new Unknown(machInst); 399 } 400 } 401 } 402 403 template <template <typename T> class BaseD, 404 template <typename T> class BaseQ> 405 StaticInstPtr 406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst, 407 IntRegIndex dest, IntRegIndex op1, 408 IntRegIndex op2, uint64_t imm) 409 { 410 if (q) { 411 if (size) 412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm); 413 else 414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); 415 } else { 416 if (size) 417 return new Unknown(machInst); 418 else 419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); 420 } 421 } 422 423 template <template <typename T> class BaseD, 424 template <typename T> class BaseQ> 425 StaticInstPtr 426 decodeNeonUTwoShiftReg(bool q, unsigned size, 427 ExtMachInst machInst, IntRegIndex dest, 428 IntRegIndex op1, uint64_t imm) 429 { 430 if (q) { 431 switch (size) { 432 case 0: 433 return new BaseQ<uint8_t>(machInst, dest, op1, imm); 434 case 1: 435 return new BaseQ<uint16_t>(machInst, dest, op1, imm); 436 case 2: 437 return new BaseQ<uint32_t>(machInst, dest, op1, imm); 438 case 3: 439 return new BaseQ<uint64_t>(machInst, dest, op1, imm); 440 default: 441 return new Unknown(machInst); 442 } 443 } else { 444 switch (size) { 445 case 0: 446 return new BaseD<uint8_t>(machInst, dest, op1, imm); 447 case 1: 448 return new BaseD<uint16_t>(machInst, dest, op1, imm); 449 case 2: 450 return new BaseD<uint32_t>(machInst, dest, op1, imm); 451 case 3: 452 return new BaseD<uint64_t>(machInst, dest, op1, imm); 453 default: 454 return new Unknown(machInst); 455 } 456 } 457 } 458 459 template <template <typename T> class BaseD, 460 template <typename T> class BaseQ> 461 StaticInstPtr 462 decodeNeonSTwoShiftReg(bool q, unsigned size, 463 ExtMachInst machInst, IntRegIndex dest, 464 IntRegIndex op1, uint64_t imm) 465 { 466 if (q) { 467 switch (size) { 468 case 0: 469 return new BaseQ<int8_t>(machInst, dest, op1, imm); 470 case 1: 471 return new BaseQ<int16_t>(machInst, dest, op1, imm); 472 case 2: 473 return new BaseQ<int32_t>(machInst, dest, op1, imm); 474 case 3: 475 return new BaseQ<int64_t>(machInst, dest, op1, imm); 476 default: 477 return new Unknown(machInst); 478 } 479 } else { 480 switch (size) { 481 case 0: 482 return new BaseD<int8_t>(machInst, dest, op1, imm); 483 case 1: 484 return new BaseD<int16_t>(machInst, dest, op1, imm); 485 case 2: 486 return new BaseD<int32_t>(machInst, dest, op1, imm); 487 case 3: 488 return new BaseD<int64_t>(machInst, dest, op1, imm); 489 default: 490 return new Unknown(machInst); 491 } 492 } 493 } 494 495 496 template <template <typename T> class BaseD, 497 template <typename T> class BaseQ> 498 StaticInstPtr 499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size, 500 ExtMachInst machInst, IntRegIndex dest, 501 IntRegIndex op1, uint64_t imm) 502 { 503 if (notSigned) { 504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>( 505 q, size, machInst, dest, op1, imm); 506 } else { 507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>( 508 q, size, machInst, dest, op1, imm); 509 } 510 } 511 512 template <template <typename T> class Base> 513 StaticInstPtr 514 decodeNeonUTwoShiftUSReg(unsigned size, 515 ExtMachInst machInst, IntRegIndex dest, 516 IntRegIndex op1, uint64_t imm) 517 { 518 switch (size) { 519 case 0: 520 return new Base<uint8_t>(machInst, dest, op1, imm); 521 case 1: 522 return new Base<uint16_t>(machInst, dest, op1, imm); 523 case 2: 524 return new Base<uint32_t>(machInst, dest, op1, imm); 525 default: 526 return new Unknown(machInst); 527 } 528 } 529 530 template <template <typename T> class Base> 531 StaticInstPtr 532 decodeNeonUTwoShiftUReg(unsigned size, 533 ExtMachInst machInst, IntRegIndex dest, 534 IntRegIndex op1, uint64_t imm) 535 { 536 switch (size) { 537 case 0: 538 return new Base<uint8_t>(machInst, dest, op1, imm); 539 case 1: 540 return new Base<uint16_t>(machInst, dest, op1, imm); 541 case 2: 542 return new Base<uint32_t>(machInst, dest, op1, imm); 543 case 3: 544 return new Base<uint64_t>(machInst, dest, op1, imm); 545 default: 546 return new Unknown(machInst); 547 } 548 } 549 550 template <template <typename T> class Base> 551 StaticInstPtr 552 decodeNeonSTwoShiftUReg(unsigned size, 553 ExtMachInst machInst, IntRegIndex dest, 554 IntRegIndex op1, uint64_t imm) 555 { 556 switch (size) { 557 case 0: 558 return new Base<int8_t>(machInst, dest, op1, imm); 559 case 1: 560 return new Base<int16_t>(machInst, dest, op1, imm); 561 case 2: 562 return new Base<int32_t>(machInst, dest, op1, imm); 563 case 3: 564 return new Base<int64_t>(machInst, dest, op1, imm); 565 default: 566 return new Unknown(machInst); 567 } 568 } 569 570 template <template <typename T> class BaseD, 571 template <typename T> class BaseQ> 572 StaticInstPtr 573 decodeNeonUTwoShiftSReg(bool q, unsigned size, 574 ExtMachInst machInst, IntRegIndex dest, 575 IntRegIndex op1, uint64_t imm) 576 { 577 if (q) { 578 return decodeNeonUTwoShiftUSReg<BaseQ>( 579 size, machInst, dest, op1, imm); 580 } else { 581 return decodeNeonUTwoShiftUSReg<BaseD>( 582 size, machInst, dest, op1, imm); 583 } 584 } 585 586 template <template <typename T> class Base> 587 StaticInstPtr 588 decodeNeonSTwoShiftUSReg(unsigned size, 589 ExtMachInst machInst, IntRegIndex dest, 590 IntRegIndex op1, uint64_t imm) 591 { 592 switch (size) { 593 case 0: 594 return new Base<int8_t>(machInst, dest, op1, imm); 595 case 1: 596 return new Base<int16_t>(machInst, dest, op1, imm); 597 case 2: 598 return new Base<int32_t>(machInst, dest, op1, imm); 599 default: 600 return new Unknown(machInst); 601 } 602 } 603 604 template <template <typename T> class BaseD, 605 template <typename T> class BaseQ> 606 StaticInstPtr 607 decodeNeonSTwoShiftSReg(bool q, unsigned size, 608 ExtMachInst machInst, IntRegIndex dest, 609 IntRegIndex op1, uint64_t imm) 610 { 611 if (q) { 612 return decodeNeonSTwoShiftUSReg<BaseQ>( 613 size, machInst, dest, op1, imm); 614 } else { 615 return decodeNeonSTwoShiftUSReg<BaseD>( 616 size, machInst, dest, op1, imm); 617 } 618 } 619 620 template <template <typename T> class BaseD, 621 template <typename T> class BaseQ> 622 StaticInstPtr 623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size, 624 ExtMachInst machInst, IntRegIndex dest, 625 IntRegIndex op1, uint64_t imm) 626 { 627 if (notSigned) { 628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 629 q, size, machInst, dest, op1, imm); 630 } else { 631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 632 q, size, machInst, dest, op1, imm); 633 } 634 } 635 636 template <template <typename T> class BaseD, 637 template <typename T> class BaseQ> 638 StaticInstPtr 639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, 640 IntRegIndex dest, IntRegIndex op1, uint64_t imm) 641 { 642 if (q) { 643 return decodeNeonUTwoShiftUReg<BaseQ>( 644 size, machInst, dest, op1, imm); 645 } else { 646 return decodeNeonUTwoShiftUSReg<BaseD>( 647 size, machInst, dest, op1, imm); 648 } 649 } 650 651 template <template <typename T> class BaseD, 652 template <typename T> class BaseQ> 653 StaticInstPtr 654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, 655 IntRegIndex dest, IntRegIndex op1, uint64_t imm) 656 { 657 if (q) { 658 return decodeNeonSTwoShiftUReg<BaseQ>( 659 size, machInst, dest, op1, imm); 660 } else { 661 return decodeNeonSTwoShiftUSReg<BaseD>( 662 size, machInst, dest, op1, imm); 663 } 664 } 665 666 template <template <typename T> class Base> 667 StaticInstPtr 668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst, 669 IntRegIndex dest, IntRegIndex op1, uint64_t imm) 670 { 671 if (size) 672 return new Base<uint64_t>(machInst, dest, op1, imm); 673 else 674 return new Base<uint32_t>(machInst, dest, op1, imm); 675 } 676 677 template <template <typename T> class BaseD, 678 template <typename T> class BaseQ> 679 StaticInstPtr 680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst, 681 IntRegIndex dest, IntRegIndex op1, uint64_t imm) 682 { 683 if (q) { 684 if (size) 685 return new BaseQ<uint64_t>(machInst, dest, op1, imm); 686 else 687 return new BaseQ<uint32_t>(machInst, dest, op1, imm); 688 } else { 689 if (size) 690 return new Unknown(machInst); 691 else 692 return new BaseD<uint32_t>(machInst, dest, op1, imm); 693 } 694 } 695 696 template <template <typename T> class Base> 697 StaticInstPtr 698 decodeNeonUTwoMiscUSReg(unsigned size, 699 ExtMachInst machInst, IntRegIndex dest, 700 IntRegIndex op1) 701 { 702 switch (size) { 703 case 0: 704 return new Base<uint8_t>(machInst, dest, op1); 705 case 1: 706 return new Base<uint16_t>(machInst, dest, op1); 707 case 2: 708 return new Base<uint32_t>(machInst, dest, op1); 709 default: 710 return new Unknown(machInst); 711 } 712 } 713 714 template <template <typename T> class Base> 715 StaticInstPtr 716 decodeNeonSTwoMiscUSReg(unsigned size, 717 ExtMachInst machInst, IntRegIndex dest, 718 IntRegIndex op1) 719 { 720 switch (size) { 721 case 0: 722 return new Base<int8_t>(machInst, dest, op1); 723 case 1: 724 return new Base<int16_t>(machInst, dest, op1); 725 case 2: 726 return new Base<int32_t>(machInst, dest, op1); 727 default: 728 return new Unknown(machInst); 729 } 730 } 731 732 template <template <typename T> class BaseD, 733 template <typename T> class BaseQ> 734 StaticInstPtr 735 decodeNeonUTwoMiscSReg(bool q, unsigned size, 736 ExtMachInst machInst, IntRegIndex dest, 737 IntRegIndex op1) 738 { 739 if (q) { 740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 741 } else { 742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 743 } 744 } 745 746 template <template <typename T> class BaseD, 747 template <typename T> class BaseQ> 748 StaticInstPtr 749 decodeNeonSTwoMiscSReg(bool q, unsigned size, 750 ExtMachInst machInst, IntRegIndex dest, 751 IntRegIndex op1) 752 { 753 if (q) { 754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); 755 } else { 756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 757 } 758 } 759 760 template <template <typename T> class Base> 761 StaticInstPtr 762 decodeNeonUTwoMiscUReg(unsigned size, 763 ExtMachInst machInst, IntRegIndex dest, 764 IntRegIndex op1) 765 { 766 switch (size) { 767 case 0: 768 return new Base<uint8_t>(machInst, dest, op1); 769 case 1: 770 return new Base<uint16_t>(machInst, dest, op1); 771 case 2: 772 return new Base<uint32_t>(machInst, dest, op1); 773 case 3: 774 return new Base<uint64_t>(machInst, dest, op1); 775 default: 776 return new Unknown(machInst); 777 } 778 } 779 780 template <template <typename T> class Base> 781 StaticInstPtr 782 decodeNeonSTwoMiscUReg(unsigned size, 783 ExtMachInst machInst, IntRegIndex dest, 784 IntRegIndex op1) 785 { 786 switch (size) { 787 case 0: 788 return new Base<int8_t>(machInst, dest, op1); 789 case 1: 790 return new Base<int16_t>(machInst, dest, op1); 791 case 2: 792 return new Base<int32_t>(machInst, dest, op1); 793 case 3: 794 return new Base<int64_t>(machInst, dest, op1); 795 default: 796 return new Unknown(machInst); 797 } 798 } 799 800 template <template <typename T> class BaseD, 801 template <typename T> class BaseQ> 802 StaticInstPtr 803 decodeNeonSTwoMiscReg(bool q, unsigned size, 804 ExtMachInst machInst, IntRegIndex dest, 805 IntRegIndex op1) 806 { 807 if (q) { 808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 809 } else { 810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1); 811 } 812 } 813 814 template <template <typename T> class BaseD, 815 template <typename T> class BaseQ> 816 StaticInstPtr 817 decodeNeonUTwoMiscReg(bool q, unsigned size, 818 ExtMachInst machInst, IntRegIndex dest, 819 IntRegIndex op1) 820 { 821 if (q) { 822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 823 } else { 824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1); 825 } 826 } 827 828 template <template <typename T> class BaseD, 829 template <typename T> class BaseQ> 830 StaticInstPtr 831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size, 832 ExtMachInst machInst, IntRegIndex dest, 833 IntRegIndex op1) 834 { 835 if (notSigned) { 836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>( 837 q, size, machInst, dest, op1); 838 } else { 839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>( 840 q, size, machInst, dest, op1); 841 } 842 } 843 844 template <template <typename T> class BaseD, 845 template <typename T> class BaseQ> 846 StaticInstPtr 847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, 848 IntRegIndex dest, IntRegIndex op1) 849 { 850 if (q) { 851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 852 } else { 853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 854 } 855 } 856 857 template <template <typename T> class BaseD, 858 template <typename T> class BaseQ> 859 StaticInstPtr 860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, 861 IntRegIndex dest, IntRegIndex op1) 862 { 863 if (q) { 864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); 865 } else { 866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); 867 } 868 } 869 870 template <template <typename T> class BaseD, 871 template <typename T> class BaseQ> 872 StaticInstPtr 873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst, 874 IntRegIndex dest, IntRegIndex op1) 875 { 876 if (q) { 877 if (size) 878 return new BaseQ<uint64_t>(machInst, dest, op1); 879 else 880 return new BaseQ<uint32_t>(machInst, dest, op1); 881 } else { 882 if (size) 883 return new Unknown(machInst); 884 else 885 return new BaseD<uint32_t>(machInst, dest, op1); 886 } 887 } 888 889 template <template <typename T> class BaseD, 890 template <typename T> class BaseQ> 891 StaticInstPtr 892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst, 893 IntRegIndex dest, IntRegIndex op1) 894 { 895 if (size) 896 return new BaseQ<uint64_t>(machInst, dest, op1); 897 else 898 return new BaseD<uint32_t>(machInst, dest, op1); 899 } 900 901 template <template <typename T> class Base> 902 StaticInstPtr 903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst, 904 IntRegIndex dest, IntRegIndex op1) 905 { 906 if (size) 907 return new Base<uint64_t>(machInst, dest, op1); 908 else 909 return new Base<uint32_t>(machInst, dest, op1); 910 } 911 912 template <template <typename T> class BaseD, 913 template <typename T> class BaseQ> 914 StaticInstPtr 915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, 916 IntRegIndex dest, IntRegIndex op1) 917 { 918 if (q) { 919 switch (size) { 920 case 0x0: 921 return new BaseQ<uint8_t>(machInst, dest, op1); 922 case 0x1: 923 return new BaseQ<uint16_t>(machInst, dest, op1); 924 case 0x2: 925 return new BaseQ<uint32_t>(machInst, dest, op1); 926 default: 927 return new Unknown(machInst); 928 } 929 } else { 930 switch (size) { 931 case 0x0: 932 return new BaseD<uint8_t>(machInst, dest, op1); 933 case 0x1: 934 return new BaseD<uint16_t>(machInst, dest, op1); 935 default: 936 return new Unknown(machInst); 937 } 938 } 939 } 940 941 template <template <typename T> class BaseD, 942 template <typename T> class BaseQ, 943 template <typename T> class BaseBQ> 944 StaticInstPtr 945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, 946 IntRegIndex dest, IntRegIndex op1) 947 { 948 if (q) { 949 switch (size) { 950 case 0x0: 951 return new BaseQ<uint8_t>(machInst, dest, op1); 952 case 0x1: 953 return new BaseQ<uint16_t>(machInst, dest, op1); 954 case 0x2: 955 return new BaseBQ<uint32_t>(machInst, dest, op1); 956 default: 957 return new Unknown(machInst); 958 } 959 } else { 960 switch (size) { 961 case 0x0: 962 return new BaseD<uint8_t>(machInst, dest, op1); 963 case 0x1: 964 return new BaseD<uint16_t>(machInst, dest, op1); 965 default: 966 return new Unknown(machInst); 967 } 968 } 969 } 970 971 template <template <typename T> class BaseD, 972 template <typename T> class BaseQ> 973 StaticInstPtr 974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, 975 IntRegIndex dest, IntRegIndex op1) 976 { 977 if (q) { 978 switch (size) { 979 case 0x0: 980 return new BaseQ<int8_t>(machInst, dest, op1); 981 case 0x1: 982 return new BaseQ<int16_t>(machInst, dest, op1); 983 case 0x2: 984 return new BaseQ<int32_t>(machInst, dest, op1); 985 default: 986 return new Unknown(machInst); 987 } 988 } else { 989 switch (size) { 990 case 0x0: 991 return new BaseD<int8_t>(machInst, dest, op1); 992 case 0x1: 993 return new BaseD<int16_t>(machInst, dest, op1); 994 default: 995 return new Unknown(machInst); 996 } 997 } 998 } 999 1000 template <template <typename T> class BaseD, 1001 template <typename T> class BaseQ, 1002 template <typename T> class BaseBQ> 1003 StaticInstPtr 1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, 1005 IntRegIndex dest, IntRegIndex op1) 1006 { 1007 if (q) { 1008 switch (size) { 1009 case 0x0: 1010 return new BaseQ<uint8_t>(machInst, dest, op1); 1011 case 0x1: 1012 return new BaseQ<uint16_t>(machInst, dest, op1); 1013 case 0x2: 1014 return new BaseBQ<uint32_t>(machInst, dest, op1); 1015 default: 1016 return new Unknown(machInst); 1017 } 1018 } else { 1019 switch (size) { 1020 case 0x0: 1021 return new BaseD<uint8_t>(machInst, dest, op1); 1022 case 0x1: 1023 return new BaseD<uint16_t>(machInst, dest, op1); 1024 default: 1025 return new Unknown(machInst); 1026 } 1027 } 1028 } 1029 1030 template <template <typename T> class BaseD, 1031 template <typename T> class BaseQ, 1032 template <typename T> class BaseBQ> 1033 StaticInstPtr 1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, 1035 IntRegIndex dest, IntRegIndex op1) 1036 { 1037 if (q) { 1038 switch (size) { 1039 case 0x0: 1040 return new BaseQ<int8_t>(machInst, dest, op1); 1041 case 0x1: 1042 return new BaseQ<int16_t>(machInst, dest, op1); 1043 case 0x2: 1044 return new BaseBQ<int32_t>(machInst, dest, op1); 1045 default: 1046 return new Unknown(machInst); 1047 } 1048 } else { 1049 switch (size) { 1050 case 0x0: 1051 return new BaseD<int8_t>(machInst, dest, op1); 1052 case 0x1: 1053 return new BaseD<int16_t>(machInst, dest, op1); 1054 default: 1055 return new Unknown(machInst); 1056 } 1057 } 1058 } 1059}}; 1060 1061let {{ 1062 header_output = "" 1063 exec_output = "" 1064 1065 vcompares = ''' 1066 static float 1067 vcgtFunc(float op1, float op2) 1068 { 1069 if (std::isnan(op1) || std::isnan(op2)) 1070 return 2.0; 1071 return (op1 > op2) ? 0.0 : 1.0; 1072 } 1073 1074 static float 1075 vcgeFunc(float op1, float op2) 1076 { 1077 if (std::isnan(op1) || std::isnan(op2)) 1078 return 2.0; 1079 return (op1 >= op2) ? 0.0 : 1.0; 1080 } 1081 1082 static float 1083 vceqFunc(float op1, float op2) 1084 { 1085 if (isSnan(op1) || isSnan(op2)) 1086 return 2.0; 1087 return (op1 == op2) ? 0.0 : 1.0; 1088 } 1089''' 1090 vcomparesL = ''' 1091 static float 1092 vcleFunc(float op1, float op2) 1093 { 1094 if (std::isnan(op1) || std::isnan(op2)) 1095 return 2.0; 1096 return (op1 <= op2) ? 0.0 : 1.0; 1097 } 1098 1099 static float 1100 vcltFunc(float op1, float op2) 1101 { 1102 if (std::isnan(op1) || std::isnan(op2)) 1103 return 2.0; 1104 return (op1 < op2) ? 0.0 : 1.0; 1105 } 1106''' 1107 vacomparesG = ''' 1108 static float 1109 vacgtFunc(float op1, float op2) 1110 { 1111 if (std::isnan(op1) || std::isnan(op2)) 1112 return 2.0; 1113 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0; 1114 } 1115 1116 static float 1117 vacgeFunc(float op1, float op2) 1118 { 1119 if (std::isnan(op1) || std::isnan(op2)) 1120 return 2.0; 1121 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0; 1122 } 1123''' 1124 1125 exec_output += vcompares + vacomparesG 1126 1127 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t") 1128 unsignedTypes = smallUnsignedTypes + ("uint64_t",) 1129 smallSignedTypes = ("int8_t", "int16_t", "int32_t") 1130 signedTypes = smallSignedTypes + ("int64_t",) 1131 smallTypes = smallUnsignedTypes + smallSignedTypes 1132 allTypes = unsignedTypes + signedTypes 1133 1134 def threeEqualRegInst(name, Name, opClass, types, rCount, op, 1135 readDest=False, pairwise=False, 1136 standardFpcsr=False): 1137 global header_output, exec_output 1138 eWalkCode = simdEnabledCheckCode + ''' 1139 RegVect srcReg1, srcReg2, destReg; 1140 ''' 1141 for reg in range(rCount): 1142 eWalkCode += ''' 1143 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1144 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 1145 ''' % { "reg" : reg } 1146 if readDest: 1147 eWalkCode += ''' 1148 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1149 ''' % { "reg" : reg } 1150 readDestCode = '' 1151 if standardFpcsr: 1152 eWalkCode += ''' 1153 FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc); 1154 ''' 1155 if readDest: 1156 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1157 if pairwise: 1158 eWalkCode += ''' 1159 for (unsigned i = 0; i < eCount; i++) { 1160 Element srcElem1 = gtoh(2 * i < eCount ? 1161 srcReg1.elements[2 * i] : 1162 srcReg2.elements[2 * i - eCount]); 1163 Element srcElem2 = gtoh(2 * i < eCount ? 1164 srcReg1.elements[2 * i + 1] : 1165 srcReg2.elements[2 * i + 1 - eCount]); 1166 Element destElem; 1167 %(readDest)s 1168 %(op)s 1169 destReg.elements[i] = htog(destElem); 1170 } 1171 ''' % { "op" : op, "readDest" : readDestCode } 1172 else: 1173 eWalkCode += ''' 1174 for (unsigned i = 0; i < eCount; i++) { 1175 Element srcElem1 = gtoh(srcReg1.elements[i]); 1176 Element srcElem2 = gtoh(srcReg2.elements[i]); 1177 Element destElem; 1178 %(readDest)s 1179 %(op)s 1180 destReg.elements[i] = htog(destElem); 1181 } 1182 ''' % { "op" : op, "readDest" : readDestCode } 1183 if standardFpcsr: 1184 eWalkCode += ''' 1185 FpscrExc = fpscr; 1186 ''' 1187 for reg in range(rCount): 1188 eWalkCode += ''' 1189 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1190 ''' % { "reg" : reg } 1191 iop = InstObjParams(name, Name, 1192 "RegRegRegOp", 1193 { "code": eWalkCode, 1194 "r_count": rCount, 1195 "predicate_test": predicateTest, 1196 "op_class": opClass }, []) 1197 header_output += NeonRegRegRegOpDeclare.subst(iop) 1198 exec_output += NeonEqualRegExecute.subst(iop) 1199 for type in types: 1200 substDict = { "targs" : type, 1201 "class_name" : Name } 1202 exec_output += NeonExecDeclare.subst(substDict) 1203 1204 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op, 1205 readDest=False, pairwise=False, toInt=False): 1206 global header_output, exec_output 1207 eWalkCode = simdEnabledCheckCode + ''' 1208 typedef float FloatVect[rCount]; 1209 FloatVect srcRegs1, srcRegs2; 1210 ''' 1211 if toInt: 1212 eWalkCode += 'RegVect destRegs;\n' 1213 else: 1214 eWalkCode += 'FloatVect destRegs;\n' 1215 for reg in range(rCount): 1216 eWalkCode += ''' 1217 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1218 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 1219 ''' % { "reg" : reg } 1220 if readDest: 1221 if toInt: 1222 eWalkCode += ''' 1223 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1224 ''' % { "reg" : reg } 1225 else: 1226 eWalkCode += ''' 1227 destRegs[%(reg)d] = FpDestP%(reg)d; 1228 ''' % { "reg" : reg } 1229 readDestCode = '' 1230 if readDest: 1231 readDestCode = 'destReg = destRegs[r];' 1232 destType = 'float' 1233 writeDest = 'destRegs[r] = destReg;' 1234 if toInt: 1235 destType = 'uint32_t' 1236 writeDest = 'destRegs.regs[r] = destReg;' 1237 if pairwise: 1238 eWalkCode += ''' 1239 for (unsigned r = 0; r < rCount; r++) { 1240 float srcReg1 = (2 * r < rCount) ? 1241 srcRegs1[2 * r] : srcRegs2[2 * r - rCount]; 1242 float srcReg2 = (2 * r < rCount) ? 1243 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount]; 1244 %(destType)s destReg; 1245 %(readDest)s 1246 %(op)s 1247 %(writeDest)s 1248 } 1249 ''' % { "op" : op, 1250 "readDest" : readDestCode, 1251 "destType" : destType, 1252 "writeDest" : writeDest } 1253 else: 1254 eWalkCode += ''' 1255 for (unsigned r = 0; r < rCount; r++) { 1256 float srcReg1 = srcRegs1[r]; 1257 float srcReg2 = srcRegs2[r]; 1258 %(destType)s destReg; 1259 %(readDest)s 1260 %(op)s 1261 %(writeDest)s 1262 } 1263 ''' % { "op" : op, 1264 "readDest" : readDestCode, 1265 "destType" : destType, 1266 "writeDest" : writeDest } 1267 for reg in range(rCount): 1268 if toInt: 1269 eWalkCode += ''' 1270 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; 1271 ''' % { "reg" : reg } 1272 else: 1273 eWalkCode += ''' 1274 FpDestP%(reg)d = destRegs[%(reg)d]; 1275 ''' % { "reg" : reg } 1276 iop = InstObjParams(name, Name, 1277 "FpRegRegRegOp", 1278 { "code": eWalkCode, 1279 "r_count": rCount, 1280 "predicate_test": predicateTest, 1281 "op_class": opClass }, []) 1282 header_output += NeonRegRegRegOpDeclare.subst(iop) 1283 exec_output += NeonEqualRegExecute.subst(iop) 1284 for type in types: 1285 substDict = { "targs" : type, 1286 "class_name" : Name } 1287 exec_output += NeonExecDeclare.subst(substDict) 1288 1289 def threeUnequalRegInst(name, Name, opClass, types, op, 1290 bigSrc1, bigSrc2, bigDest, readDest): 1291 global header_output, exec_output 1292 src1Cnt = src2Cnt = destCnt = 2 1293 src1Prefix = src2Prefix = destPrefix = '' 1294 if bigSrc1: 1295 src1Cnt = 4 1296 src1Prefix = 'Big' 1297 if bigSrc2: 1298 src2Cnt = 4 1299 src2Prefix = 'Big' 1300 if bigDest: 1301 destCnt = 4 1302 destPrefix = 'Big' 1303 eWalkCode = simdEnabledCheckCode + ''' 1304 %sRegVect srcReg1; 1305 %sRegVect srcReg2; 1306 %sRegVect destReg; 1307 ''' % (src1Prefix, src2Prefix, destPrefix) 1308 for reg in range(src1Cnt): 1309 eWalkCode += ''' 1310 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1311 ''' % { "reg" : reg } 1312 for reg in range(src2Cnt): 1313 eWalkCode += ''' 1314 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 1315 ''' % { "reg" : reg } 1316 if readDest: 1317 for reg in range(destCnt): 1318 eWalkCode += ''' 1319 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1320 ''' % { "reg" : reg } 1321 readDestCode = '' 1322 if readDest: 1323 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1324 eWalkCode += ''' 1325 for (unsigned i = 0; i < eCount; i++) { 1326 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 1327 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]); 1328 %(destPrefix)sElement destElem; 1329 %(readDest)s 1330 %(op)s 1331 destReg.elements[i] = htog(destElem); 1332 } 1333 ''' % { "op" : op, "readDest" : readDestCode, 1334 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 1335 "destPrefix" : destPrefix } 1336 for reg in range(destCnt): 1337 eWalkCode += ''' 1338 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1339 ''' % { "reg" : reg } 1340 iop = InstObjParams(name, Name, 1341 "RegRegRegOp", 1342 { "code": eWalkCode, 1343 "r_count": 2, 1344 "predicate_test": predicateTest, 1345 "op_class": opClass }, []) 1346 header_output += NeonRegRegRegOpDeclare.subst(iop) 1347 exec_output += NeonUnequalRegExecute.subst(iop) 1348 for type in types: 1349 substDict = { "targs" : type, 1350 "class_name" : Name } 1351 exec_output += NeonExecDeclare.subst(substDict) 1352 1353 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False): 1354 threeUnequalRegInst(name, Name, opClass, types, op, 1355 True, True, False, readDest) 1356 1357 def threeRegLongInst(name, Name, opClass, types, op, readDest=False): 1358 threeUnequalRegInst(name, Name, opClass, types, op, 1359 False, False, True, readDest) 1360 1361 def threeRegWideInst(name, Name, opClass, types, op, readDest=False): 1362 threeUnequalRegInst(name, Name, opClass, types, op, 1363 True, False, True, readDest) 1364 1365 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False): 1366 global header_output, exec_output 1367 eWalkCode = simdEnabledCheckCode + ''' 1368 RegVect srcReg1, srcReg2, destReg; 1369 ''' 1370 for reg in range(rCount): 1371 eWalkCode += ''' 1372 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1373 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 1374 ''' % { "reg" : reg } 1375 if readDest: 1376 eWalkCode += ''' 1377 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1378 ''' % { "reg" : reg } 1379 readDestCode = '' 1380 if readDest: 1381 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1382 eWalkCode += ''' 1383 if (imm < 0 && imm >= eCount) { 1384 fault = std::make_shared<UndefinedInstruction>(machInst, false, 1385 mnemonic); 1386 } else { 1387 for (unsigned i = 0; i < eCount; i++) { 1388 Element srcElem1 = gtoh(srcReg1.elements[i]); 1389 Element srcElem2 = gtoh(srcReg2.elements[imm]); 1390 Element destElem; 1391 %(readDest)s 1392 %(op)s 1393 destReg.elements[i] = htog(destElem); 1394 } 1395 } 1396 ''' % { "op" : op, "readDest" : readDestCode } 1397 for reg in range(rCount): 1398 eWalkCode += ''' 1399 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1400 ''' % { "reg" : reg } 1401 iop = InstObjParams(name, Name, 1402 "RegRegRegImmOp", 1403 { "code": eWalkCode, 1404 "r_count": rCount, 1405 "predicate_test": predicateTest, 1406 "op_class": opClass }, []) 1407 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 1408 exec_output += NeonEqualRegExecute.subst(iop) 1409 for type in types: 1410 substDict = { "targs" : type, 1411 "class_name" : Name } 1412 exec_output += NeonExecDeclare.subst(substDict) 1413 1414 def twoRegLongInst(name, Name, opClass, types, op, readDest=False): 1415 global header_output, exec_output 1416 rCount = 2 1417 eWalkCode = simdEnabledCheckCode + ''' 1418 RegVect srcReg1, srcReg2; 1419 BigRegVect destReg; 1420 ''' 1421 for reg in range(rCount): 1422 eWalkCode += ''' 1423 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1424 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);; 1425 ''' % { "reg" : reg } 1426 if readDest: 1427 for reg in range(2 * rCount): 1428 eWalkCode += ''' 1429 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1430 ''' % { "reg" : reg } 1431 readDestCode = '' 1432 if readDest: 1433 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1434 eWalkCode += ''' 1435 if (imm < 0 && imm >= eCount) { 1436 fault = std::make_shared<UndefinedInstruction>(machInst, false, 1437 mnemonic); 1438 } else { 1439 for (unsigned i = 0; i < eCount; i++) { 1440 Element srcElem1 = gtoh(srcReg1.elements[i]); 1441 Element srcElem2 = gtoh(srcReg2.elements[imm]); 1442 BigElement destElem; 1443 %(readDest)s 1444 %(op)s 1445 destReg.elements[i] = htog(destElem); 1446 } 1447 } 1448 ''' % { "op" : op, "readDest" : readDestCode } 1449 for reg in range(2 * rCount): 1450 eWalkCode += ''' 1451 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1452 ''' % { "reg" : reg } 1453 iop = InstObjParams(name, Name, 1454 "RegRegRegImmOp", 1455 { "code": eWalkCode, 1456 "r_count": rCount, 1457 "predicate_test": predicateTest, 1458 "op_class": opClass }, []) 1459 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 1460 exec_output += NeonUnequalRegExecute.subst(iop) 1461 for type in types: 1462 substDict = { "targs" : type, 1463 "class_name" : Name } 1464 exec_output += NeonExecDeclare.subst(substDict) 1465 1466 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False): 1467 global header_output, exec_output 1468 eWalkCode = simdEnabledCheckCode + ''' 1469 typedef float FloatVect[rCount]; 1470 FloatVect srcRegs1, srcRegs2, destRegs; 1471 ''' 1472 for reg in range(rCount): 1473 eWalkCode += ''' 1474 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1475 srcRegs2[%(reg)d] = FpOp2P%(reg)d; 1476 ''' % { "reg" : reg } 1477 if readDest: 1478 eWalkCode += ''' 1479 destRegs[%(reg)d] = FpDestP%(reg)d; 1480 ''' % { "reg" : reg } 1481 readDestCode = '' 1482 if readDest: 1483 readDestCode = 'destReg = destRegs[i];' 1484 eWalkCode += ''' 1485 if (imm < 0 && imm >= eCount) { 1486 fault = std::make_shared<UndefinedInstruction>(machInst, false, 1487 mnemonic); 1488 } else { 1489 for (unsigned i = 0; i < rCount; i++) { 1490 float srcReg1 = srcRegs1[i]; 1491 float srcReg2 = srcRegs2[imm]; 1492 float destReg; 1493 %(readDest)s 1494 %(op)s 1495 destRegs[i] = destReg; 1496 } 1497 } 1498 ''' % { "op" : op, "readDest" : readDestCode } 1499 for reg in range(rCount): 1500 eWalkCode += ''' 1501 FpDestP%(reg)d = destRegs[%(reg)d]; 1502 ''' % { "reg" : reg } 1503 iop = InstObjParams(name, Name, 1504 "FpRegRegRegImmOp", 1505 { "code": eWalkCode, 1506 "r_count": rCount, 1507 "predicate_test": predicateTest, 1508 "op_class": opClass }, []) 1509 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 1510 exec_output += NeonEqualRegExecute.subst(iop) 1511 for type in types: 1512 substDict = { "targs" : type, 1513 "class_name" : Name } 1514 exec_output += NeonExecDeclare.subst(substDict) 1515 1516 def twoRegShiftInst(name, Name, opClass, types, rCount, op, 1517 readDest=False, toInt=False, fromInt=False): 1518 global header_output, exec_output 1519 eWalkCode = simdEnabledCheckCode + ''' 1520 RegVect srcRegs1, destRegs; 1521 ''' 1522 for reg in range(rCount): 1523 eWalkCode += ''' 1524 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1525 ''' % { "reg" : reg } 1526 if readDest: 1527 eWalkCode += ''' 1528 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1529 ''' % { "reg" : reg } 1530 readDestCode = '' 1531 if readDest: 1532 readDestCode = 'destElem = gtoh(destRegs.elements[i]);' 1533 if toInt: 1534 readDestCode = 'destReg = gtoh(destRegs.regs[i]);' 1535 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);' 1536 if fromInt: 1537 readOpCode = 'uint32_t srcReg1 = gtoh(srcRegs1.regs[i]);' 1538 declDest = 'Element destElem;' 1539 writeDestCode = 'destRegs.elements[i] = htog(destElem);' 1540 if toInt: 1541 declDest = 'uint32_t destReg;' 1542 writeDestCode = 'destRegs.regs[i] = htog(destReg);' 1543 eWalkCode += ''' 1544 for (unsigned i = 0; i < eCount; i++) { 1545 %(readOp)s 1546 %(declDest)s 1547 %(readDest)s 1548 %(op)s 1549 %(writeDest)s 1550 } 1551 ''' % { "readOp" : readOpCode, 1552 "declDest" : declDest, 1553 "readDest" : readDestCode, 1554 "op" : op, 1555 "writeDest" : writeDestCode } 1556 for reg in range(rCount): 1557 eWalkCode += ''' 1558 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]); 1559 ''' % { "reg" : reg } 1560 iop = InstObjParams(name, Name, 1561 "RegRegImmOp", 1562 { "code": eWalkCode, 1563 "r_count": rCount, 1564 "predicate_test": predicateTest, 1565 "op_class": opClass }, []) 1566 header_output += NeonRegRegImmOpDeclare.subst(iop) 1567 exec_output += NeonEqualRegExecute.subst(iop) 1568 for type in types: 1569 substDict = { "targs" : type, 1570 "class_name" : Name } 1571 exec_output += NeonExecDeclare.subst(substDict) 1572 1573 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False): 1574 global header_output, exec_output 1575 eWalkCode = simdEnabledCheckCode + ''' 1576 BigRegVect srcReg1; 1577 RegVect destReg; 1578 ''' 1579 for reg in range(4): 1580 eWalkCode += ''' 1581 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1582 ''' % { "reg" : reg } 1583 if readDest: 1584 for reg in range(2): 1585 eWalkCode += ''' 1586 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1587 ''' % { "reg" : reg } 1588 readDestCode = '' 1589 if readDest: 1590 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1591 eWalkCode += ''' 1592 for (unsigned i = 0; i < eCount; i++) { 1593 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1594 Element destElem; 1595 %(readDest)s 1596 %(op)s 1597 destReg.elements[i] = htog(destElem); 1598 } 1599 ''' % { "op" : op, "readDest" : readDestCode } 1600 for reg in range(2): 1601 eWalkCode += ''' 1602 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1603 ''' % { "reg" : reg } 1604 iop = InstObjParams(name, Name, 1605 "RegRegImmOp", 1606 { "code": eWalkCode, 1607 "r_count": 2, 1608 "predicate_test": predicateTest, 1609 "op_class": opClass }, []) 1610 header_output += NeonRegRegImmOpDeclare.subst(iop) 1611 exec_output += NeonUnequalRegExecute.subst(iop) 1612 for type in types: 1613 substDict = { "targs" : type, 1614 "class_name" : Name } 1615 exec_output += NeonExecDeclare.subst(substDict) 1616 1617 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False): 1618 global header_output, exec_output 1619 eWalkCode = simdEnabledCheckCode + ''' 1620 RegVect srcReg1; 1621 BigRegVect destReg; 1622 ''' 1623 for reg in range(2): 1624 eWalkCode += ''' 1625 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1626 ''' % { "reg" : reg } 1627 if readDest: 1628 for reg in range(4): 1629 eWalkCode += ''' 1630 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1631 ''' % { "reg" : reg } 1632 readDestCode = '' 1633 if readDest: 1634 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1635 eWalkCode += ''' 1636 for (unsigned i = 0; i < eCount; i++) { 1637 Element srcElem1 = gtoh(srcReg1.elements[i]); 1638 BigElement destElem; 1639 %(readDest)s 1640 %(op)s 1641 destReg.elements[i] = htog(destElem); 1642 } 1643 ''' % { "op" : op, "readDest" : readDestCode } 1644 for reg in range(4): 1645 eWalkCode += ''' 1646 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1647 ''' % { "reg" : reg } 1648 iop = InstObjParams(name, Name, 1649 "RegRegImmOp", 1650 { "code": eWalkCode, 1651 "r_count": 2, 1652 "predicate_test": predicateTest, 1653 "op_class": opClass }, []) 1654 header_output += NeonRegRegImmOpDeclare.subst(iop) 1655 exec_output += NeonUnequalRegExecute.subst(iop) 1656 for type in types: 1657 substDict = { "targs" : type, 1658 "class_name" : Name } 1659 exec_output += NeonExecDeclare.subst(substDict) 1660 1661 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False): 1662 global header_output, exec_output 1663 eWalkCode = simdEnabledCheckCode + ''' 1664 RegVect srcReg1, destReg; 1665 ''' 1666 for reg in range(rCount): 1667 eWalkCode += ''' 1668 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1669 ''' % { "reg" : reg } 1670 if readDest: 1671 eWalkCode += ''' 1672 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1673 ''' % { "reg" : reg } 1674 readDestCode = '' 1675 if readDest: 1676 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1677 eWalkCode += ''' 1678 for (unsigned i = 0; i < eCount; i++) { 1679 unsigned j = i; 1680 Element srcElem1 = gtoh(srcReg1.elements[i]); 1681 Element destElem; 1682 %(readDest)s 1683 %(op)s 1684 destReg.elements[j] = htog(destElem); 1685 } 1686 ''' % { "op" : op, "readDest" : readDestCode } 1687 for reg in range(rCount): 1688 eWalkCode += ''' 1689 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1690 ''' % { "reg" : reg } 1691 iop = InstObjParams(name, Name, 1692 "RegRegOp", 1693 { "code": eWalkCode, 1694 "r_count": rCount, 1695 "predicate_test": predicateTest, 1696 "op_class": opClass }, []) 1697 header_output += NeonRegRegOpDeclare.subst(iop) 1698 exec_output += NeonEqualRegExecute.subst(iop) 1699 for type in types: 1700 substDict = { "targs" : type, 1701 "class_name" : Name } 1702 exec_output += NeonExecDeclare.subst(substDict) 1703 1704 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False): 1705 global header_output, exec_output 1706 eWalkCode = simdEnabledCheckCode + ''' 1707 RegVect srcReg1, destReg; 1708 ''' 1709 for reg in range(rCount): 1710 eWalkCode += ''' 1711 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1712 ''' % { "reg" : reg } 1713 if readDest: 1714 eWalkCode += ''' 1715 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1716 ''' % { "reg" : reg } 1717 readDestCode = '' 1718 if readDest: 1719 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1720 eWalkCode += ''' 1721 for (unsigned i = 0; i < eCount; i++) { 1722 Element srcElem1 = gtoh(srcReg1.elements[imm]); 1723 Element destElem; 1724 %(readDest)s 1725 %(op)s 1726 destReg.elements[i] = htog(destElem); 1727 } 1728 ''' % { "op" : op, "readDest" : readDestCode } 1729 for reg in range(rCount): 1730 eWalkCode += ''' 1731 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1732 ''' % { "reg" : reg } 1733 iop = InstObjParams(name, Name, 1734 "RegRegImmOp", 1735 { "code": eWalkCode, 1736 "r_count": rCount, 1737 "predicate_test": predicateTest, 1738 "op_class": opClass }, []) 1739 header_output += NeonRegRegImmOpDeclare.subst(iop) 1740 exec_output += NeonEqualRegExecute.subst(iop) 1741 for type in types: 1742 substDict = { "targs" : type, 1743 "class_name" : Name } 1744 exec_output += NeonExecDeclare.subst(substDict) 1745 1746 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False): 1747 global header_output, exec_output 1748 eWalkCode = simdEnabledCheckCode + ''' 1749 RegVect srcReg1, destReg; 1750 ''' 1751 for reg in range(rCount): 1752 eWalkCode += ''' 1753 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1754 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1755 ''' % { "reg" : reg } 1756 if readDest: 1757 eWalkCode += ''' 1758 ''' % { "reg" : reg } 1759 readDestCode = '' 1760 if readDest: 1761 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1762 eWalkCode += op 1763 for reg in range(rCount): 1764 eWalkCode += ''' 1765 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1766 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]); 1767 ''' % { "reg" : reg } 1768 iop = InstObjParams(name, Name, 1769 "RegRegOp", 1770 { "code": eWalkCode, 1771 "r_count": rCount, 1772 "predicate_test": predicateTest, 1773 "op_class": opClass }, []) 1774 header_output += NeonRegRegOpDeclare.subst(iop) 1775 exec_output += NeonEqualRegExecute.subst(iop) 1776 for type in types: 1777 substDict = { "targs" : type, 1778 "class_name" : Name } 1779 exec_output += NeonExecDeclare.subst(substDict) 1780 1781 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op, 1782 readDest=False, toInt=False): 1783 global header_output, exec_output 1784 eWalkCode = simdEnabledCheckCode + ''' 1785 typedef float FloatVect[rCount]; 1786 FloatVect srcRegs1; 1787 ''' 1788 if toInt: 1789 eWalkCode += 'RegVect destRegs;\n' 1790 else: 1791 eWalkCode += 'FloatVect destRegs;\n' 1792 for reg in range(rCount): 1793 eWalkCode += ''' 1794 srcRegs1[%(reg)d] = FpOp1P%(reg)d; 1795 ''' % { "reg" : reg } 1796 if readDest: 1797 if toInt: 1798 eWalkCode += ''' 1799 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits; 1800 ''' % { "reg" : reg } 1801 else: 1802 eWalkCode += ''' 1803 destRegs[%(reg)d] = FpDestP%(reg)d; 1804 ''' % { "reg" : reg } 1805 readDestCode = '' 1806 if readDest: 1807 readDestCode = 'destReg = destRegs[i];' 1808 destType = 'float' 1809 writeDest = 'destRegs[r] = destReg;' 1810 if toInt: 1811 destType = 'uint32_t' 1812 writeDest = 'destRegs.regs[r] = destReg;' 1813 eWalkCode += ''' 1814 for (unsigned r = 0; r < rCount; r++) { 1815 float srcReg1 = srcRegs1[r]; 1816 %(destType)s destReg; 1817 %(readDest)s 1818 %(op)s 1819 %(writeDest)s 1820 } 1821 ''' % { "op" : op, 1822 "readDest" : readDestCode, 1823 "destType" : destType, 1824 "writeDest" : writeDest } 1825 for reg in range(rCount): 1826 if toInt: 1827 eWalkCode += ''' 1828 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d]; 1829 ''' % { "reg" : reg } 1830 else: 1831 eWalkCode += ''' 1832 FpDestP%(reg)d = destRegs[%(reg)d]; 1833 ''' % { "reg" : reg } 1834 iop = InstObjParams(name, Name, 1835 "FpRegRegOp", 1836 { "code": eWalkCode, 1837 "r_count": rCount, 1838 "predicate_test": predicateTest, 1839 "op_class": opClass }, []) 1840 header_output += NeonRegRegOpDeclare.subst(iop) 1841 exec_output += NeonEqualRegExecute.subst(iop) 1842 for type in types: 1843 substDict = { "targs" : type, 1844 "class_name" : Name } 1845 exec_output += NeonExecDeclare.subst(substDict) 1846 1847 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False): 1848 global header_output, exec_output 1849 eWalkCode = simdEnabledCheckCode + ''' 1850 RegVect srcRegs; 1851 BigRegVect destReg; 1852 ''' 1853 for reg in range(rCount): 1854 eWalkCode += ''' 1855 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1856 ''' % { "reg" : reg } 1857 if readDest: 1858 eWalkCode += ''' 1859 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1860 ''' % { "reg" : reg } 1861 readDestCode = '' 1862 if readDest: 1863 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1864 eWalkCode += ''' 1865 for (unsigned i = 0; i < eCount / 2; i++) { 1866 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 1867 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 1868 BigElement destElem; 1869 %(readDest)s 1870 %(op)s 1871 destReg.elements[i] = htog(destElem); 1872 } 1873 ''' % { "op" : op, "readDest" : readDestCode } 1874 for reg in range(rCount): 1875 eWalkCode += ''' 1876 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1877 ''' % { "reg" : reg } 1878 iop = InstObjParams(name, Name, 1879 "RegRegOp", 1880 { "code": eWalkCode, 1881 "r_count": rCount, 1882 "predicate_test": predicateTest, 1883 "op_class": opClass }, []) 1884 header_output += NeonRegRegOpDeclare.subst(iop) 1885 exec_output += NeonUnequalRegExecute.subst(iop) 1886 for type in types: 1887 substDict = { "targs" : type, 1888 "class_name" : Name } 1889 exec_output += NeonExecDeclare.subst(substDict) 1890 1891 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False): 1892 global header_output, exec_output 1893 eWalkCode = simdEnabledCheckCode + ''' 1894 BigRegVect srcReg1; 1895 RegVect destReg; 1896 ''' 1897 for reg in range(4): 1898 eWalkCode += ''' 1899 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1900 ''' % { "reg" : reg } 1901 if readDest: 1902 for reg in range(2): 1903 eWalkCode += ''' 1904 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1905 ''' % { "reg" : reg } 1906 readDestCode = '' 1907 if readDest: 1908 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1909 eWalkCode += ''' 1910 for (unsigned i = 0; i < eCount; i++) { 1911 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 1912 Element destElem; 1913 %(readDest)s 1914 %(op)s 1915 destReg.elements[i] = htog(destElem); 1916 } 1917 ''' % { "op" : op, "readDest" : readDestCode } 1918 for reg in range(2): 1919 eWalkCode += ''' 1920 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1921 ''' % { "reg" : reg } 1922 iop = InstObjParams(name, Name, 1923 "RegRegOp", 1924 { "code": eWalkCode, 1925 "r_count": 2, 1926 "predicate_test": predicateTest, 1927 "op_class": opClass }, []) 1928 header_output += NeonRegRegOpDeclare.subst(iop) 1929 exec_output += NeonUnequalRegExecute.subst(iop) 1930 for type in types: 1931 substDict = { "targs" : type, 1932 "class_name" : Name } 1933 exec_output += NeonExecDeclare.subst(substDict) 1934 1935 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False): 1936 global header_output, exec_output 1937 eWalkCode = simdEnabledCheckCode + ''' 1938 RegVect destReg; 1939 ''' 1940 if readDest: 1941 for reg in range(rCount): 1942 eWalkCode += ''' 1943 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1944 ''' % { "reg" : reg } 1945 readDestCode = '' 1946 if readDest: 1947 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 1948 eWalkCode += ''' 1949 for (unsigned i = 0; i < eCount; i++) { 1950 Element destElem; 1951 %(readDest)s 1952 %(op)s 1953 destReg.elements[i] = htog(destElem); 1954 } 1955 ''' % { "op" : op, "readDest" : readDestCode } 1956 for reg in range(rCount): 1957 eWalkCode += ''' 1958 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 1959 ''' % { "reg" : reg } 1960 iop = InstObjParams(name, Name, 1961 "RegImmOp", 1962 { "code": eWalkCode, 1963 "r_count": rCount, 1964 "predicate_test": predicateTest, 1965 "op_class": opClass }, []) 1966 header_output += NeonRegImmOpDeclare.subst(iop) 1967 exec_output += NeonEqualRegExecute.subst(iop) 1968 for type in types: 1969 substDict = { "targs" : type, 1970 "class_name" : Name } 1971 exec_output += NeonExecDeclare.subst(substDict) 1972 1973 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False): 1974 global header_output, exec_output 1975 eWalkCode = simdEnabledCheckCode + ''' 1976 RegVect srcReg1; 1977 BigRegVect destReg; 1978 ''' 1979 for reg in range(2): 1980 eWalkCode += ''' 1981 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 1982 ''' % { "reg" : reg } 1983 if readDest: 1984 for reg in range(4): 1985 eWalkCode += ''' 1986 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw); 1987 ''' % { "reg" : reg } 1988 readDestCode = '' 1989 if readDest: 1990 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 1991 eWalkCode += ''' 1992 for (unsigned i = 0; i < eCount; i++) { 1993 Element srcElem1 = gtoh(srcReg1.elements[i]); 1994 BigElement destElem; 1995 %(readDest)s 1996 %(op)s 1997 destReg.elements[i] = htog(destElem); 1998 } 1999 ''' % { "op" : op, "readDest" : readDestCode } 2000 for reg in range(4): 2001 eWalkCode += ''' 2002 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 2003 ''' % { "reg" : reg } 2004 iop = InstObjParams(name, Name, 2005 "RegRegOp", 2006 { "code": eWalkCode, 2007 "r_count": 2, 2008 "predicate_test": predicateTest, 2009 "op_class": opClass }, []) 2010 header_output += NeonRegRegOpDeclare.subst(iop) 2011 exec_output += NeonUnequalRegExecute.subst(iop) 2012 for type in types: 2013 substDict = { "targs" : type, 2014 "class_name" : Name } 2015 exec_output += NeonExecDeclare.subst(substDict) 2016 2017 vhaddCode = ''' 2018 Element carryBit = 2019 (((unsigned)srcElem1 & 0x1) + 2020 ((unsigned)srcElem2 & 0x1)) >> 1; 2021 // Use division instead of a shift to ensure the sign extension works 2022 // right. The compiler will figure out if it can be a shift. Mask the 2023 // inputs so they get truncated correctly. 2024 destElem = (((srcElem1 & ~(Element)1) / 2) + 2025 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 2026 ''' 2027 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode) 2028 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode) 2029 2030 vrhaddCode = ''' 2031 Element carryBit = 2032 (((unsigned)srcElem1 & 0x1) + 2033 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 2034 // Use division instead of a shift to ensure the sign extension works 2035 // right. The compiler will figure out if it can be a shift. Mask the 2036 // inputs so they get truncated correctly. 2037 destElem = (((srcElem1 & ~(Element)1) / 2) + 2038 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 2039 ''' 2040 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode) 2041 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode) 2042 2043 vhsubCode = ''' 2044 Element barrowBit = 2045 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 2046 // Use division instead of a shift to ensure the sign extension works 2047 // right. The compiler will figure out if it can be a shift. Mask the 2048 // inputs so they get truncated correctly. 2049 destElem = (((srcElem1 & ~(Element)1) / 2) - 2050 ((srcElem2 & ~(Element)1) / 2)) - barrowBit; 2051 ''' 2052 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode) 2053 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode) 2054 2055 vandCode = ''' 2056 destElem = srcElem1 & srcElem2; 2057 ''' 2058 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode) 2059 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode) 2060 2061 vbicCode = ''' 2062 destElem = srcElem1 & ~srcElem2; 2063 ''' 2064 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode) 2065 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode) 2066 2067 vorrCode = ''' 2068 destElem = srcElem1 | srcElem2; 2069 ''' 2070 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode) 2071 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode) 2072 2073 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode) 2074 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode) 2075 2076 vornCode = ''' 2077 destElem = srcElem1 | ~srcElem2; 2078 ''' 2079 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode) 2080 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode) 2081 2082 veorCode = ''' 2083 destElem = srcElem1 ^ srcElem2; 2084 ''' 2085 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode) 2086 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode) 2087 2088 vbifCode = ''' 2089 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2); 2090 ''' 2091 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True) 2092 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True) 2093 vbitCode = ''' 2094 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2); 2095 ''' 2096 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True) 2097 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True) 2098 vbslCode = ''' 2099 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem); 2100 ''' 2101 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True) 2102 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True) 2103 2104 vmaxCode = ''' 2105 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2; 2106 ''' 2107 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode) 2108 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode) 2109 2110 vminCode = ''' 2111 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2; 2112 ''' 2113 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode) 2114 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode) 2115 2116 vaddCode = ''' 2117 destElem = srcElem1 + srcElem2; 2118 ''' 2119 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode) 2120 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode) 2121 2122 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes, 2123 2, vaddCode, pairwise=True) 2124 vaddlwCode = ''' 2125 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 2126 ''' 2127 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode) 2128 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode) 2129 vaddhnCode = ''' 2130 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 2131 (sizeof(Element) * 8); 2132 ''' 2133 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode) 2134 vraddhnCode = ''' 2135 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 2136 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 2137 (sizeof(Element) * 8); 2138 ''' 2139 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode) 2140 2141 vsubCode = ''' 2142 destElem = srcElem1 - srcElem2; 2143 ''' 2144 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode) 2145 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode) 2146 vsublwCode = ''' 2147 destElem = (BigElement)srcElem1 - (BigElement)srcElem2; 2148 ''' 2149 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode) 2150 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode) 2151 2152 vqaddUCode = ''' 2153 destElem = srcElem1 + srcElem2; 2154 FPSCR fpscr = (FPSCR) FpscrQc; 2155 if (destElem < srcElem1 || destElem < srcElem2) { 2156 destElem = (Element)(-1); 2157 fpscr.qc = 1; 2158 } 2159 FpscrQc = fpscr; 2160 ''' 2161 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode) 2162 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode) 2163 vsubhnCode = ''' 2164 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 2165 (sizeof(Element) * 8); 2166 ''' 2167 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode) 2168 vrsubhnCode = ''' 2169 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 2170 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 2171 (sizeof(Element) * 8); 2172 ''' 2173 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode) 2174 2175 vqaddSCode = ''' 2176 destElem = srcElem1 + srcElem2; 2177 FPSCR fpscr = (FPSCR) FpscrQc; 2178 bool negDest = (destElem < 0); 2179 bool negSrc1 = (srcElem1 < 0); 2180 bool negSrc2 = (srcElem2 < 0); 2181 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 2182 if (negDest) 2183 /* If (>=0) plus (>=0) yields (<0), saturate to +. */ 2184 destElem = std::numeric_limits<Element>::max(); 2185 else 2186 /* If (<0) plus (<0) yields (>=0), saturate to -. */ 2187 destElem = std::numeric_limits<Element>::min(); 2188 fpscr.qc = 1; 2189 } 2190 FpscrQc = fpscr; 2191 ''' 2192 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode) 2193 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode) 2194 2195 vqsubUCode = ''' 2196 destElem = srcElem1 - srcElem2; 2197 FPSCR fpscr = (FPSCR) FpscrQc; 2198 if (destElem > srcElem1) { 2199 destElem = 0; 2200 fpscr.qc = 1; 2201 } 2202 FpscrQc = fpscr; 2203 ''' 2204 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode) 2205 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode) 2206 2207 vqsubSCode = ''' 2208 destElem = srcElem1 - srcElem2; 2209 FPSCR fpscr = (FPSCR) FpscrQc; 2210 bool negDest = (destElem < 0); 2211 bool negSrc1 = (srcElem1 < 0); 2212 bool posSrc2 = (srcElem2 >= 0); 2213 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 2214 if (negDest) 2215 /* If (>=0) minus (<0) yields (<0), saturate to +. */ 2216 destElem = std::numeric_limits<Element>::max(); 2217 else 2218 /* If (<0) minus (>=0) yields (>=0), saturate to -. */ 2219 destElem = std::numeric_limits<Element>::min(); 2220 fpscr.qc = 1; 2221 } 2222 FpscrQc = fpscr; 2223 ''' 2224 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode) 2225 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode) 2226 2227 vcgtCode = ''' 2228 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0; 2229 ''' 2230 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode) 2231 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode) 2232 2233 vcgeCode = ''' 2234 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0; 2235 ''' 2236 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode) 2237 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode) 2238 2239 vceqCode = ''' 2240 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0; 2241 ''' 2242 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode) 2243 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode) 2244 2245 vshlCode = ''' 2246 int16_t shiftAmt = (int8_t)srcElem2; 2247 if (shiftAmt < 0) { 2248 shiftAmt = -shiftAmt; 2249 if (shiftAmt >= sizeof(Element) * 8) { 2250 shiftAmt = sizeof(Element) * 8 - 1; 2251 destElem = 0; 2252 } else { 2253 destElem = (srcElem1 >> shiftAmt); 2254 } 2255 // Make sure the right shift sign extended when it should. 2256 if (ltz(srcElem1) && !ltz(destElem)) { 2257 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2258 1 - shiftAmt)); 2259 } 2260 } else { 2261 if (shiftAmt >= sizeof(Element) * 8) { 2262 destElem = 0; 2263 } else { 2264 destElem = srcElem1 << shiftAmt; 2265 } 2266 } 2267 ''' 2268 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode) 2269 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode) 2270 2271 vrshlCode = ''' 2272 int16_t shiftAmt = (int8_t)srcElem2; 2273 if (shiftAmt < 0) { 2274 shiftAmt = -shiftAmt; 2275 Element rBit = 0; 2276 if (shiftAmt <= sizeof(Element) * 8) 2277 rBit = bits(srcElem1, shiftAmt - 1); 2278 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 2279 rBit = 1; 2280 if (shiftAmt >= sizeof(Element) * 8) { 2281 shiftAmt = sizeof(Element) * 8 - 1; 2282 destElem = 0; 2283 } else { 2284 destElem = (srcElem1 >> shiftAmt); 2285 } 2286 // Make sure the right shift sign extended when it should. 2287 if (ltz(srcElem1) && !ltz(destElem)) { 2288 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2289 1 - shiftAmt)); 2290 } 2291 destElem += rBit; 2292 } else if (shiftAmt > 0) { 2293 if (shiftAmt >= sizeof(Element) * 8) { 2294 destElem = 0; 2295 } else { 2296 destElem = srcElem1 << shiftAmt; 2297 } 2298 } else { 2299 destElem = srcElem1; 2300 } 2301 ''' 2302 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode) 2303 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode) 2304 2305 vqshlUCode = ''' 2306 int16_t shiftAmt = (int8_t)srcElem2; 2307 FPSCR fpscr = (FPSCR) FpscrQc; 2308 if (shiftAmt < 0) { 2309 shiftAmt = -shiftAmt; 2310 if (shiftAmt >= sizeof(Element) * 8) { 2311 shiftAmt = sizeof(Element) * 8 - 1; 2312 destElem = 0; 2313 } else { 2314 destElem = (srcElem1 >> shiftAmt); 2315 } 2316 } else if (shiftAmt > 0) { 2317 if (shiftAmt >= sizeof(Element) * 8) { 2318 if (srcElem1 != 0) { 2319 destElem = mask(sizeof(Element) * 8); 2320 fpscr.qc = 1; 2321 } else { 2322 destElem = 0; 2323 } 2324 } else { 2325 if (bits(srcElem1, sizeof(Element) * 8 - 1, 2326 sizeof(Element) * 8 - shiftAmt)) { 2327 destElem = mask(sizeof(Element) * 8); 2328 fpscr.qc = 1; 2329 } else { 2330 destElem = srcElem1 << shiftAmt; 2331 } 2332 } 2333 } else { 2334 destElem = srcElem1; 2335 } 2336 FpscrQc = fpscr; 2337 ''' 2338 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode) 2339 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode) 2340 2341 vqshlSCode = ''' 2342 int16_t shiftAmt = (int8_t)srcElem2; 2343 FPSCR fpscr = (FPSCR) FpscrQc; 2344 if (shiftAmt < 0) { 2345 shiftAmt = -shiftAmt; 2346 if (shiftAmt >= sizeof(Element) * 8) { 2347 shiftAmt = sizeof(Element) * 8 - 1; 2348 destElem = 0; 2349 } else { 2350 destElem = (srcElem1 >> shiftAmt); 2351 } 2352 // Make sure the right shift sign extended when it should. 2353 if (srcElem1 < 0 && destElem >= 0) { 2354 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2355 1 - shiftAmt)); 2356 } 2357 } else if (shiftAmt > 0) { 2358 bool sat = false; 2359 if (shiftAmt >= sizeof(Element) * 8) { 2360 if (srcElem1 != 0) 2361 sat = true; 2362 else 2363 destElem = 0; 2364 } else { 2365 if (bits(srcElem1, sizeof(Element) * 8 - 1, 2366 sizeof(Element) * 8 - 1 - shiftAmt) != 2367 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2368 sat = true; 2369 } else { 2370 destElem = srcElem1 << shiftAmt; 2371 } 2372 } 2373 if (sat) { 2374 fpscr.qc = 1; 2375 destElem = mask(sizeof(Element) * 8 - 1); 2376 if (srcElem1 < 0) 2377 destElem = ~destElem; 2378 } 2379 } else { 2380 destElem = srcElem1; 2381 } 2382 FpscrQc = fpscr; 2383 ''' 2384 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode) 2385 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode) 2386 2387 vqrshlUCode = ''' 2388 int16_t shiftAmt = (int8_t)srcElem2; 2389 FPSCR fpscr = (FPSCR) FpscrQc; 2390 if (shiftAmt < 0) { 2391 shiftAmt = -shiftAmt; 2392 Element rBit = 0; 2393 if (shiftAmt <= sizeof(Element) * 8) 2394 rBit = bits(srcElem1, shiftAmt - 1); 2395 if (shiftAmt >= sizeof(Element) * 8) { 2396 shiftAmt = sizeof(Element) * 8 - 1; 2397 destElem = 0; 2398 } else { 2399 destElem = (srcElem1 >> shiftAmt); 2400 } 2401 destElem += rBit; 2402 } else { 2403 if (shiftAmt >= sizeof(Element) * 8) { 2404 if (srcElem1 != 0) { 2405 destElem = mask(sizeof(Element) * 8); 2406 fpscr.qc = 1; 2407 } else { 2408 destElem = 0; 2409 } 2410 } else { 2411 if (bits(srcElem1, sizeof(Element) * 8 - 1, 2412 sizeof(Element) * 8 - shiftAmt)) { 2413 destElem = mask(sizeof(Element) * 8); 2414 fpscr.qc = 1; 2415 } else { 2416 destElem = srcElem1 << shiftAmt; 2417 } 2418 } 2419 } 2420 FpscrQc = fpscr; 2421 ''' 2422 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode) 2423 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode) 2424 2425 vqrshlSCode = ''' 2426 int16_t shiftAmt = (int8_t)srcElem2; 2427 FPSCR fpscr = (FPSCR) FpscrQc; 2428 if (shiftAmt < 0) { 2429 shiftAmt = -shiftAmt; 2430 Element rBit = 0; 2431 if (shiftAmt <= sizeof(Element) * 8) 2432 rBit = bits(srcElem1, shiftAmt - 1); 2433 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 2434 rBit = 1; 2435 if (shiftAmt >= sizeof(Element) * 8) { 2436 shiftAmt = sizeof(Element) * 8 - 1; 2437 destElem = 0; 2438 } else { 2439 destElem = (srcElem1 >> shiftAmt); 2440 } 2441 // Make sure the right shift sign extended when it should. 2442 if (srcElem1 < 0 && destElem >= 0) { 2443 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2444 1 - shiftAmt)); 2445 } 2446 destElem += rBit; 2447 } else if (shiftAmt > 0) { 2448 bool sat = false; 2449 if (shiftAmt >= sizeof(Element) * 8) { 2450 if (srcElem1 != 0) 2451 sat = true; 2452 else 2453 destElem = 0; 2454 } else { 2455 if (bits(srcElem1, sizeof(Element) * 8 - 1, 2456 sizeof(Element) * 8 - 1 - shiftAmt) != 2457 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2458 sat = true; 2459 } else { 2460 destElem = srcElem1 << shiftAmt; 2461 } 2462 } 2463 if (sat) { 2464 fpscr.qc = 1; 2465 destElem = mask(sizeof(Element) * 8 - 1); 2466 if (srcElem1 < 0) 2467 destElem = ~destElem; 2468 } 2469 } else { 2470 destElem = srcElem1; 2471 } 2472 FpscrQc = fpscr; 2473 ''' 2474 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode) 2475 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode) 2476 2477 vabaCode = ''' 2478 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 2479 (srcElem2 - srcElem1); 2480 ''' 2481 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True) 2482 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True) 2483 vabalCode = ''' 2484 destElem += (srcElem1 > srcElem2) ? 2485 ((BigElement)srcElem1 - (BigElement)srcElem2) : 2486 ((BigElement)srcElem2 - (BigElement)srcElem1); 2487 ''' 2488 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True) 2489 2490 vabdCode = ''' 2491 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 2492 (srcElem2 - srcElem1); 2493 ''' 2494 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode) 2495 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode) 2496 vabdlCode = ''' 2497 destElem = (srcElem1 > srcElem2) ? 2498 ((BigElement)srcElem1 - (BigElement)srcElem2) : 2499 ((BigElement)srcElem2 - (BigElement)srcElem1); 2500 ''' 2501 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode) 2502 2503 vtstCode = ''' 2504 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0; 2505 ''' 2506 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode) 2507 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode) 2508 2509 vmulCode = ''' 2510 destElem = srcElem1 * srcElem2; 2511 ''' 2512 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode) 2513 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode) 2514 vmullCode = ''' 2515 destElem = (BigElement)srcElem1 * (BigElement)srcElem2; 2516 ''' 2517 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode) 2518 2519 vmlaCode = ''' 2520 destElem = destElem + srcElem1 * srcElem2; 2521 ''' 2522 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True) 2523 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True) 2524 vmlalCode = ''' 2525 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2; 2526 ''' 2527 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True) 2528 2529 vqdmlalCode = ''' 2530 FPSCR fpscr = (FPSCR) FpscrQc; 2531 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2532 Element maxNeg = std::numeric_limits<Element>::min(); 2533 Element halfNeg = maxNeg / 2; 2534 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2535 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2536 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2537 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2538 fpscr.qc = 1; 2539 } 2540 bool negPreDest = ltz(destElem); 2541 destElem += midElem; 2542 bool negDest = ltz(destElem); 2543 bool negMid = ltz(midElem); 2544 if (negPreDest == negMid && negMid != negDest) { 2545 destElem = mask(sizeof(BigElement) * 8 - 1); 2546 if (negPreDest) 2547 destElem = ~destElem; 2548 fpscr.qc = 1; 2549 } 2550 FpscrQc = fpscr; 2551 ''' 2552 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True) 2553 2554 vqdmlslCode = ''' 2555 FPSCR fpscr = (FPSCR) FpscrQc; 2556 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2557 Element maxNeg = std::numeric_limits<Element>::min(); 2558 Element halfNeg = maxNeg / 2; 2559 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2560 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2561 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2562 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2563 fpscr.qc = 1; 2564 } 2565 bool negPreDest = ltz(destElem); 2566 destElem -= midElem; 2567 bool negDest = ltz(destElem); 2568 bool posMid = ltz((BigElement)-midElem); 2569 if (negPreDest == posMid && posMid != negDest) { 2570 destElem = mask(sizeof(BigElement) * 8 - 1); 2571 if (negPreDest) 2572 destElem = ~destElem; 2573 fpscr.qc = 1; 2574 } 2575 FpscrQc = fpscr; 2576 ''' 2577 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True) 2578 2579 vqdmullCode = ''' 2580 FPSCR fpscr = (FPSCR) FpscrQc; 2581 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2582 if (srcElem1 == srcElem2 && 2583 srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 2584 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2585 fpscr.qc = 1; 2586 } 2587 FpscrQc = fpscr; 2588 ''' 2589 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode) 2590 2591 vmlsCode = ''' 2592 destElem = destElem - srcElem1 * srcElem2; 2593 ''' 2594 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) 2595 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) 2596 vmlslCode = ''' 2597 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2; 2598 ''' 2599 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True) 2600 2601 vmulpCode = ''' 2602 destElem = 0; 2603 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2604 if (bits(srcElem2, j)) 2605 destElem ^= srcElem1 << j; 2606 } 2607 ''' 2608 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode) 2609 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode) 2610 vmullpCode = ''' 2611 destElem = 0; 2612 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 2613 if (bits(srcElem2, j)) 2614 destElem ^= (BigElement)srcElem1 << j; 2615 } 2616 ''' 2617 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode) 2618 2619 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True) 2620 2621 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True) 2622 2623 vqdmulhCode = ''' 2624 FPSCR fpscr = (FPSCR) FpscrQc; 2625 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2626 (sizeof(Element) * 8); 2627 if (srcElem1 == srcElem2 && 2628 srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 2629 destElem = ~srcElem1; 2630 fpscr.qc = 1; 2631 } 2632 FpscrQc = fpscr; 2633 ''' 2634 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) 2635 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) 2636 2637 vqrdmulhCode = ''' 2638 FPSCR fpscr = (FPSCR) FpscrQc; 2639 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2640 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2641 (sizeof(Element) * 8); 2642 Element maxNeg = std::numeric_limits<Element>::min(); 2643 Element halfNeg = maxNeg / 2; 2644 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2645 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2646 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2647 if (destElem < 0) { 2648 destElem = mask(sizeof(Element) * 8 - 1); 2649 } else { 2650 destElem = std::numeric_limits<Element>::min(); 2651 } 2652 fpscr.qc = 1; 2653 } 2654 FpscrQc = fpscr; 2655 ''' 2656 threeEqualRegInst("vqrdmulh", "VqrdmulhD", 2657 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) 2658 threeEqualRegInst("vqrdmulh", "VqrdmulhQ", 2659 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) 2660 2661 vMinMaxFpCode = ''' 2662 destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr); 2663 ''' 2664 vMinMaxInsts = [ 2665 ("vmax", "VmaxDFp", 2, "Max", False, ), 2666 ("vmax", "VmaxQFp", 4, "Max", False, ), 2667 ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ), 2668 ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ), 2669 ("vpmax", "VpmaxDFp", 2, "Max", True, ), 2670 ("vpmax", "VpmaxQFp", 4, "Max", True, ), 2671 ("vmin", "VminDFp", 2, "Min", False, ), 2672 ("vmin", "VminQFp", 4, "Min", False, ), 2673 ("vminnm", "VminnmDFp", 2, "MinNum", False, ), 2674 ("vminnm", "VminnmQFp", 4, "MinNum", False, ), 2675 ("vpmin", "VpminDFp", 2, "Min", True, ), 2676 ("vpmin", "VpminQFp", 4, "Min", True, ), 2677 ] 2678 for name, Name, rCount, op, pairwise in vMinMaxInsts: 2679 threeEqualRegInst( 2680 name, 2681 Name, 2682 "SimdFloatCmpOp", 2683 ("uint32_t",), 2684 rCount, 2685 vMinMaxFpCode % op, 2686 pairwise=pairwise, 2687 standardFpcsr=True, 2688 ) 2689 2690 vaddfpCode = ''' 2691 FPSCR fpscr = (FPSCR) FpscrExc; 2692 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, 2693 true, true, VfpRoundNearest); 2694 FpscrExc = fpscr; 2695 ''' 2696 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode) 2697 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode) 2698 2699 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",), 2700 2, vaddfpCode, pairwise=True) 2701 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",), 2702 4, vaddfpCode, pairwise=True) 2703 2704 vsubfpCode = ''' 2705 FPSCR fpscr = (FPSCR) FpscrExc; 2706 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2707 true, true, VfpRoundNearest); 2708 FpscrExc = fpscr; 2709 ''' 2710 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode) 2711 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode) 2712 2713 vmulfpCode = ''' 2714 FPSCR fpscr = (FPSCR) FpscrExc; 2715 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2716 true, true, VfpRoundNearest); 2717 FpscrExc = fpscr; 2718 ''' 2719 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) 2720 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) 2721 2722 vmlafpCode = ''' 2723 FPSCR fpscr = (FPSCR) FpscrExc; 2724 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2725 true, true, VfpRoundNearest); 2726 destReg = binaryOp(fpscr, mid, destReg, fpAddS, 2727 true, true, VfpRoundNearest); 2728 FpscrExc = fpscr; 2729 ''' 2730 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) 2731 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) 2732 2733 vfmafpCode = ''' 2734 FPSCR fpscr = (FPSCR) FpscrExc; 2735 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>, 2736 true, true, VfpRoundNearest); 2737 FpscrExc = fpscr; 2738 ''' 2739 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True) 2740 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True) 2741 2742 vfmsfpCode = ''' 2743 FPSCR fpscr = (FPSCR) FpscrExc; 2744 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>, 2745 true, true, VfpRoundNearest); 2746 FpscrExc = fpscr; 2747 ''' 2748 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True) 2749 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True) 2750 2751 vmlsfpCode = ''' 2752 FPSCR fpscr = (FPSCR) FpscrExc; 2753 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, 2754 true, true, VfpRoundNearest); 2755 destReg = binaryOp(fpscr, destReg, mid, fpSubS, 2756 true, true, VfpRoundNearest); 2757 FpscrExc = fpscr; 2758 ''' 2759 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) 2760 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) 2761 2762 vcgtfpCode = ''' 2763 FPSCR fpscr = (FPSCR) FpscrExc; 2764 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, 2765 true, true, VfpRoundNearest); 2766 destReg = (res == 0) ? -1 : 0; 2767 if (res == 2.0) 2768 fpscr.ioc = 1; 2769 FpscrExc = fpscr; 2770 ''' 2771 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",), 2772 2, vcgtfpCode, toInt = True) 2773 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",), 2774 4, vcgtfpCode, toInt = True) 2775 2776 vcgefpCode = ''' 2777 FPSCR fpscr = (FPSCR) FpscrExc; 2778 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, 2779 true, true, VfpRoundNearest); 2780 destReg = (res == 0) ? -1 : 0; 2781 if (res == 2.0) 2782 fpscr.ioc = 1; 2783 FpscrExc = fpscr; 2784 ''' 2785 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",), 2786 2, vcgefpCode, toInt = True) 2787 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",), 2788 4, vcgefpCode, toInt = True) 2789 2790 vacgtfpCode = ''' 2791 FPSCR fpscr = (FPSCR) FpscrExc; 2792 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, 2793 true, true, VfpRoundNearest); 2794 destReg = (res == 0) ? -1 : 0; 2795 if (res == 2.0) 2796 fpscr.ioc = 1; 2797 FpscrExc = fpscr; 2798 ''' 2799 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",), 2800 2, vacgtfpCode, toInt = True) 2801 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",), 2802 4, vacgtfpCode, toInt = True) 2803 2804 vacgefpCode = ''' 2805 FPSCR fpscr = (FPSCR) FpscrExc; 2806 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, 2807 true, true, VfpRoundNearest); 2808 destReg = (res == 0) ? -1 : 0; 2809 if (res == 2.0) 2810 fpscr.ioc = 1; 2811 FpscrExc = fpscr; 2812 ''' 2813 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",), 2814 2, vacgefpCode, toInt = True) 2815 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",), 2816 4, vacgefpCode, toInt = True) 2817 2818 vceqfpCode = ''' 2819 FPSCR fpscr = (FPSCR) FpscrExc; 2820 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, 2821 true, true, VfpRoundNearest); 2822 destReg = (res == 0) ? -1 : 0; 2823 if (res == 2.0) 2824 fpscr.ioc = 1; 2825 FpscrExc = fpscr; 2826 ''' 2827 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",), 2828 2, vceqfpCode, toInt = True) 2829 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",), 2830 4, vceqfpCode, toInt = True) 2831 2832 vrecpsCode = ''' 2833 FPSCR fpscr = (FPSCR) FpscrExc; 2834 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, 2835 true, true, VfpRoundNearest); 2836 FpscrExc = fpscr; 2837 ''' 2838 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode) 2839 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode) 2840 2841 vrsqrtsCode = ''' 2842 FPSCR fpscr = (FPSCR) FpscrExc; 2843 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, 2844 true, true, VfpRoundNearest); 2845 FpscrExc = fpscr; 2846 ''' 2847 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode) 2848 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode) 2849 2850 vabdfpCode = ''' 2851 FPSCR fpscr = (FPSCR) FpscrExc; 2852 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, 2853 true, true, VfpRoundNearest); 2854 destReg = fabs(mid); 2855 FpscrExc = fpscr; 2856 ''' 2857 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode) 2858 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode) 2859 2860 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True) 2861 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True) 2862 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) 2863 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) 2864 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True) 2865 2866 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True) 2867 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True) 2868 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) 2869 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) 2870 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True) 2871 2872 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode) 2873 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode) 2874 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) 2875 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) 2876 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode) 2877 2878 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode) 2879 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True) 2880 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True) 2881 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) 2882 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) 2883 twoEqualRegInst("vqrdmulh", "VqrdmulhsD", 2884 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) 2885 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ", 2886 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) 2887 2888 vshrCode = ''' 2889 if (imm >= sizeof(srcElem1) * 8) { 2890 if (ltz(srcElem1)) 2891 destElem = -1; 2892 else 2893 destElem = 0; 2894 } else { 2895 destElem = srcElem1 >> imm; 2896 } 2897 ''' 2898 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode) 2899 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode) 2900 2901 vsraCode = ''' 2902 Element mid;; 2903 if (imm >= sizeof(srcElem1) * 8) { 2904 mid = ltz(srcElem1) ? -1 : 0; 2905 } else { 2906 mid = srcElem1 >> imm; 2907 if (ltz(srcElem1) && !ltz(mid)) { 2908 mid |= -(mid & ((Element)1 << 2909 (sizeof(Element) * 8 - 1 - imm))); 2910 } 2911 } 2912 destElem += mid; 2913 ''' 2914 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True) 2915 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True) 2916 2917 vrshrCode = ''' 2918 if (imm > sizeof(srcElem1) * 8) { 2919 destElem = 0; 2920 } else if (imm) { 2921 Element rBit = bits(srcElem1, imm - 1); 2922 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2923 } else { 2924 destElem = srcElem1; 2925 } 2926 ''' 2927 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode) 2928 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode) 2929 2930 vrsraCode = ''' 2931 if (imm > sizeof(srcElem1) * 8) { 2932 destElem += 0; 2933 } else if (imm) { 2934 Element rBit = bits(srcElem1, imm - 1); 2935 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2936 } else { 2937 destElem += srcElem1; 2938 } 2939 ''' 2940 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True) 2941 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True) 2942 2943 vsriCode = ''' 2944 if (imm >= sizeof(Element) * 8) { 2945 destElem = destElem; 2946 } else { 2947 destElem = (srcElem1 >> imm) | 2948 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2949 } 2950 ''' 2951 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True) 2952 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True) 2953 2954 vshlCode = ''' 2955 if (imm >= sizeof(Element) * 8) { 2956 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 2957 } else { 2958 destElem = srcElem1 << imm; 2959 } 2960 ''' 2961 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode) 2962 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode) 2963 2964 vsliCode = ''' 2965 if (imm >= sizeof(Element) * 8) { 2966 destElem = destElem; 2967 } else { 2968 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 2969 } 2970 ''' 2971 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True) 2972 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True) 2973 2974 vqshlCode = ''' 2975 FPSCR fpscr = (FPSCR) FpscrQc; 2976 if (imm >= sizeof(Element) * 8) { 2977 if (srcElem1 != 0) { 2978 destElem = std::numeric_limits<Element>::min(); 2979 if (srcElem1 > 0) 2980 destElem = ~destElem; 2981 fpscr.qc = 1; 2982 } else { 2983 destElem = 0; 2984 } 2985 } else if (imm) { 2986 destElem = (srcElem1 << imm); 2987 uint64_t topBits = bits((uint64_t)srcElem1, 2988 sizeof(Element) * 8 - 1, 2989 sizeof(Element) * 8 - 1 - imm); 2990 if (topBits != 0 && topBits != mask(imm + 1)) { 2991 destElem = std::numeric_limits<Element>::min(); 2992 if (srcElem1 > 0) 2993 destElem = ~destElem; 2994 fpscr.qc = 1; 2995 } 2996 } else { 2997 destElem = srcElem1; 2998 } 2999 FpscrQc = fpscr; 3000 ''' 3001 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode) 3002 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode) 3003 3004 vqshluCode = ''' 3005 FPSCR fpscr = (FPSCR) FpscrQc; 3006 if (imm >= sizeof(Element) * 8) { 3007 if (srcElem1 != 0) { 3008 destElem = mask(sizeof(Element) * 8); 3009 fpscr.qc = 1; 3010 } else { 3011 destElem = 0; 3012 } 3013 } else if (imm) { 3014 destElem = (srcElem1 << imm); 3015 uint64_t topBits = bits((uint64_t)srcElem1, 3016 sizeof(Element) * 8 - 1, 3017 sizeof(Element) * 8 - imm); 3018 if (topBits != 0) { 3019 destElem = mask(sizeof(Element) * 8); 3020 fpscr.qc = 1; 3021 } 3022 } else { 3023 destElem = srcElem1; 3024 } 3025 FpscrQc = fpscr; 3026 ''' 3027 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode) 3028 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode) 3029 3030 vqshlusCode = ''' 3031 FPSCR fpscr = (FPSCR) FpscrQc; 3032 if (imm >= sizeof(Element) * 8) { 3033 if (srcElem1 < 0) { 3034 destElem = 0; 3035 fpscr.qc = 1; 3036 } else if (srcElem1 > 0) { 3037 destElem = mask(sizeof(Element) * 8); 3038 fpscr.qc = 1; 3039 } else { 3040 destElem = 0; 3041 } 3042 } else if (imm) { 3043 destElem = (srcElem1 << imm); 3044 uint64_t topBits = bits((uint64_t)srcElem1, 3045 sizeof(Element) * 8 - 1, 3046 sizeof(Element) * 8 - imm); 3047 if (srcElem1 < 0) { 3048 destElem = 0; 3049 fpscr.qc = 1; 3050 } else if (topBits != 0) { 3051 destElem = mask(sizeof(Element) * 8); 3052 fpscr.qc = 1; 3053 } 3054 } else { 3055 if (srcElem1 < 0) { 3056 fpscr.qc = 1; 3057 destElem = 0; 3058 } else { 3059 destElem = srcElem1; 3060 } 3061 } 3062 FpscrQc = fpscr; 3063 ''' 3064 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode) 3065 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode) 3066 3067 vshrnCode = ''' 3068 if (imm >= sizeof(srcElem1) * 8) { 3069 destElem = 0; 3070 } else { 3071 destElem = srcElem1 >> imm; 3072 } 3073 ''' 3074 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode) 3075 3076 vrshrnCode = ''' 3077 if (imm > sizeof(srcElem1) * 8) { 3078 destElem = 0; 3079 } else if (imm) { 3080 Element rBit = bits(srcElem1, imm - 1); 3081 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 3082 } else { 3083 destElem = srcElem1; 3084 } 3085 ''' 3086 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode) 3087 3088 vqshrnCode = ''' 3089 FPSCR fpscr = (FPSCR) FpscrQc; 3090 if (imm > sizeof(srcElem1) * 8) { 3091 if (srcElem1 != 0 && srcElem1 != -1) 3092 fpscr.qc = 1; 3093 destElem = 0; 3094 } else if (imm) { 3095 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3096 mid |= -(mid & ((BigElement)1 << 3097 (sizeof(BigElement) * 8 - 1 - imm))); 3098 if (mid != (Element)mid) { 3099 destElem = mask(sizeof(Element) * 8 - 1); 3100 if (srcElem1 < 0) 3101 destElem = ~destElem; 3102 fpscr.qc = 1; 3103 } else { 3104 destElem = mid; 3105 } 3106 } else { 3107 destElem = srcElem1; 3108 } 3109 FpscrQc = fpscr; 3110 ''' 3111 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode) 3112 3113 vqshrunCode = ''' 3114 FPSCR fpscr = (FPSCR) FpscrQc; 3115 if (imm > sizeof(srcElem1) * 8) { 3116 if (srcElem1 != 0) 3117 fpscr.qc = 1; 3118 destElem = 0; 3119 } else if (imm) { 3120 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3121 if (mid != (Element)mid) { 3122 destElem = mask(sizeof(Element) * 8); 3123 fpscr.qc = 1; 3124 } else { 3125 destElem = mid; 3126 } 3127 } else { 3128 destElem = srcElem1; 3129 } 3130 FpscrQc = fpscr; 3131 ''' 3132 twoRegNarrowShiftInst("vqshrun", "NVqshrun", 3133 "SimdShiftOp", smallUnsignedTypes, vqshrunCode) 3134 3135 vqshrunsCode = ''' 3136 FPSCR fpscr = (FPSCR) FpscrQc; 3137 if (imm > sizeof(srcElem1) * 8) { 3138 if (srcElem1 != 0) 3139 fpscr.qc = 1; 3140 destElem = 0; 3141 } else if (imm) { 3142 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3143 if (bits(mid, sizeof(BigElement) * 8 - 1, 3144 sizeof(Element) * 8) != 0) { 3145 if (srcElem1 < 0) { 3146 destElem = 0; 3147 } else { 3148 destElem = mask(sizeof(Element) * 8); 3149 } 3150 fpscr.qc = 1; 3151 } else { 3152 destElem = mid; 3153 } 3154 } else { 3155 destElem = srcElem1; 3156 } 3157 FpscrQc = fpscr; 3158 ''' 3159 twoRegNarrowShiftInst("vqshrun", "NVqshruns", 3160 "SimdShiftOp", smallSignedTypes, vqshrunsCode) 3161 3162 vqrshrnCode = ''' 3163 FPSCR fpscr = (FPSCR) FpscrQc; 3164 if (imm > sizeof(srcElem1) * 8) { 3165 if (srcElem1 != 0 && srcElem1 != -1) 3166 fpscr.qc = 1; 3167 destElem = 0; 3168 } else if (imm) { 3169 BigElement mid = (srcElem1 >> (imm - 1)); 3170 uint64_t rBit = mid & 0x1; 3171 mid >>= 1; 3172 mid |= -(mid & ((BigElement)1 << 3173 (sizeof(BigElement) * 8 - 1 - imm))); 3174 mid += rBit; 3175 if (mid != (Element)mid) { 3176 destElem = mask(sizeof(Element) * 8 - 1); 3177 if (srcElem1 < 0) 3178 destElem = ~destElem; 3179 fpscr.qc = 1; 3180 } else { 3181 destElem = mid; 3182 } 3183 } else { 3184 if (srcElem1 != (Element)srcElem1) { 3185 destElem = mask(sizeof(Element) * 8 - 1); 3186 if (srcElem1 < 0) 3187 destElem = ~destElem; 3188 fpscr.qc = 1; 3189 } else { 3190 destElem = srcElem1; 3191 } 3192 } 3193 FpscrQc = fpscr; 3194 ''' 3195 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", 3196 "SimdShiftOp", smallSignedTypes, vqrshrnCode) 3197 3198 vqrshrunCode = ''' 3199 FPSCR fpscr = (FPSCR) FpscrQc; 3200 if (imm > sizeof(srcElem1) * 8) { 3201 if (srcElem1 != 0) 3202 fpscr.qc = 1; 3203 destElem = 0; 3204 } else if (imm) { 3205 BigElement mid = (srcElem1 >> (imm - 1)); 3206 uint64_t rBit = mid & 0x1; 3207 mid >>= 1; 3208 mid += rBit; 3209 if (mid != (Element)mid) { 3210 destElem = mask(sizeof(Element) * 8); 3211 fpscr.qc = 1; 3212 } else { 3213 destElem = mid; 3214 } 3215 } else { 3216 if (srcElem1 != (Element)srcElem1) { 3217 destElem = mask(sizeof(Element) * 8 - 1); 3218 fpscr.qc = 1; 3219 } else { 3220 destElem = srcElem1; 3221 } 3222 } 3223 FpscrQc = fpscr; 3224 ''' 3225 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", 3226 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode) 3227 3228 vqrshrunsCode = ''' 3229 FPSCR fpscr = (FPSCR) FpscrQc; 3230 if (imm > sizeof(srcElem1) * 8) { 3231 if (srcElem1 != 0) 3232 fpscr.qc = 1; 3233 destElem = 0; 3234 } else if (imm) { 3235 BigElement mid = (srcElem1 >> (imm - 1)); 3236 uint64_t rBit = mid & 0x1; 3237 mid >>= 1; 3238 mid |= -(mid & ((BigElement)1 << 3239 (sizeof(BigElement) * 8 - 1 - imm))); 3240 mid += rBit; 3241 if (bits(mid, sizeof(BigElement) * 8 - 1, 3242 sizeof(Element) * 8) != 0) { 3243 if (srcElem1 < 0) { 3244 destElem = 0; 3245 } else { 3246 destElem = mask(sizeof(Element) * 8); 3247 } 3248 fpscr.qc = 1; 3249 } else { 3250 destElem = mid; 3251 } 3252 } else { 3253 if (srcElem1 < 0) { 3254 fpscr.qc = 1; 3255 destElem = 0; 3256 } else { 3257 destElem = srcElem1; 3258 } 3259 } 3260 FpscrQc = fpscr; 3261 ''' 3262 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", 3263 "SimdShiftOp", smallSignedTypes, vqrshrunsCode) 3264 3265 vshllCode = ''' 3266 if (imm >= sizeof(destElem) * 8) { 3267 destElem = 0; 3268 } else { 3269 destElem = (BigElement)srcElem1 << imm; 3270 } 3271 ''' 3272 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode) 3273 3274 vmovlCode = ''' 3275 destElem = srcElem1; 3276 ''' 3277 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode) 3278 3279 vcvt2ufxCode = ''' 3280 FPSCR fpscr = (FPSCR) FpscrExc; 3281 if (flushToZero(srcElem1)) 3282 fpscr.idc = 1; 3283 VfpSavedState state = prepFpState(VfpRoundNearest); 3284 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 3285 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm); 3286 __asm__ __volatile__("" :: "m" (destReg)); 3287 finishVfp(fpscr, state, true); 3288 FpscrExc = fpscr; 3289 ''' 3290 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",), 3291 2, vcvt2ufxCode, toInt = True) 3292 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",), 3293 4, vcvt2ufxCode, toInt = True) 3294 3295 vcvt2sfxCode = ''' 3296 FPSCR fpscr = (FPSCR) FpscrExc; 3297 if (flushToZero(srcElem1)) 3298 fpscr.idc = 1; 3299 VfpSavedState state = prepFpState(VfpRoundNearest); 3300 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); 3301 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm); 3302 __asm__ __volatile__("" :: "m" (destReg)); 3303 finishVfp(fpscr, state, true); 3304 FpscrExc = fpscr; 3305 ''' 3306 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",), 3307 2, vcvt2sfxCode, toInt = True) 3308 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",), 3309 4, vcvt2sfxCode, toInt = True) 3310 3311 vcvtu2fpCode = ''' 3312 FPSCR fpscr = (FPSCR) FpscrExc; 3313 VfpSavedState state = prepFpState(VfpRoundNearest); 3314 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 3315 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm); 3316 __asm__ __volatile__("" :: "m" (destElem)); 3317 finishVfp(fpscr, state, true); 3318 FpscrExc = fpscr; 3319 ''' 3320 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",), 3321 2, vcvtu2fpCode, fromInt = True) 3322 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",), 3323 4, vcvtu2fpCode, fromInt = True) 3324 3325 vcvts2fpCode = ''' 3326 FPSCR fpscr = (FPSCR) FpscrExc; 3327 VfpSavedState state = prepFpState(VfpRoundNearest); 3328 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); 3329 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm); 3330 __asm__ __volatile__("" :: "m" (destElem)); 3331 finishVfp(fpscr, state, true); 3332 FpscrExc = fpscr; 3333 ''' 3334 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",), 3335 2, vcvts2fpCode, fromInt = True) 3336 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",), 3337 4, vcvts2fpCode, fromInt = True) 3338 3339 vcvts2hCode = ''' 3340 destElem = 0; 3341 FPSCR fpscr = (FPSCR) FpscrExc; 3342 float srcFp1 = bitsToFp(srcElem1, (float)0.0); 3343 if (flushToZero(srcFp1)) 3344 fpscr.idc = 1; 3345 VfpSavedState state = prepFpState(VfpRoundNearest); 3346 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem) 3347 : "m" (srcFp1), "m" (destElem)); 3348 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest, 3349 fpscr.ahp, srcFp1); 3350 __asm__ __volatile__("" :: "m" (destElem)); 3351 finishVfp(fpscr, state, true); 3352 FpscrExc = fpscr; 3353 ''' 3354 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode) 3355 3356 vcvth2sCode = ''' 3357 destElem = 0; 3358 FPSCR fpscr = (FPSCR) FpscrExc; 3359 VfpSavedState state = prepFpState(VfpRoundNearest); 3360 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) 3361 : "m" (srcElem1), "m" (destElem)); 3362 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); 3363 __asm__ __volatile__("" :: "m" (destElem)); 3364 finishVfp(fpscr, state, true); 3365 FpscrExc = fpscr; 3366 ''' 3367 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) 3368 3369 vrsqrteCode = ''' 3370 destElem = unsignedRSqrtEstimate(srcElem1); 3371 ''' 3372 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode) 3373 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode) 3374 3375 vrsqrtefpCode = ''' 3376 FPSCR fpscr = (FPSCR) FpscrExc; 3377 if (flushToZero(srcReg1)) 3378 fpscr.idc = 1; 3379 destReg = fprSqrtEstimate(fpscr, srcReg1); 3380 FpscrExc = fpscr; 3381 ''' 3382 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode) 3383 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode) 3384 3385 vrecpeCode = ''' 3386 destElem = unsignedRecipEstimate(srcElem1); 3387 ''' 3388 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode) 3389 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode) 3390 3391 vrecpefpCode = ''' 3392 FPSCR fpscr = (FPSCR) FpscrExc; 3393 if (flushToZero(srcReg1)) 3394 fpscr.idc = 1; 3395 destReg = fpRecipEstimate(fpscr, srcReg1); 3396 FpscrExc = fpscr; 3397 ''' 3398 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode) 3399 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode) 3400 3401 vrev16Code = ''' 3402 destElem = srcElem1; 3403 unsigned groupSize = ((1 << 1) / sizeof(Element)); 3404 unsigned reverseMask = (groupSize - 1); 3405 j = i ^ reverseMask; 3406 ''' 3407 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code) 3408 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code) 3409 vrev32Code = ''' 3410 destElem = srcElem1; 3411 unsigned groupSize = ((1 << 2) / sizeof(Element)); 3412 unsigned reverseMask = (groupSize - 1); 3413 j = i ^ reverseMask; 3414 ''' 3415 twoRegMiscInst("vrev32", "NVrev32D", 3416 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code) 3417 twoRegMiscInst("vrev32", "NVrev32Q", 3418 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code) 3419 vrev64Code = ''' 3420 destElem = srcElem1; 3421 unsigned groupSize = ((1 << 3) / sizeof(Element)); 3422 unsigned reverseMask = (groupSize - 1); 3423 j = i ^ reverseMask; 3424 ''' 3425 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code) 3426 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code) 3427 3428 split('exec') 3429 exec_output += vcompares + vcomparesL 3430 3431 vpaddlCode = ''' 3432 destElem = (BigElement)srcElem1 + (BigElement)srcElem2; 3433 ''' 3434 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode) 3435 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode) 3436 3437 vpadalCode = ''' 3438 destElem += (BigElement)srcElem1 + (BigElement)srcElem2; 3439 ''' 3440 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True) 3441 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True) 3442 3443 vclsCode = ''' 3444 unsigned count = 0; 3445 if (srcElem1 < 0) { 3446 srcElem1 <<= 1; 3447 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 3448 count++; 3449 srcElem1 <<= 1; 3450 } 3451 } else { 3452 srcElem1 <<= 1; 3453 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 3454 count++; 3455 srcElem1 <<= 1; 3456 } 3457 } 3458 destElem = count; 3459 ''' 3460 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode) 3461 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode) 3462 3463 vclzCode = ''' 3464 unsigned count = 0; 3465 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 3466 count++; 3467 srcElem1 <<= 1; 3468 } 3469 destElem = count; 3470 ''' 3471 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode) 3472 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode) 3473 3474 vcntCode = ''' 3475 unsigned count = 0; 3476 while (srcElem1 && count < sizeof(Element) * 8) { 3477 count += srcElem1 & 0x1; 3478 srcElem1 >>= 1; 3479 } 3480 destElem = count; 3481 ''' 3482 3483 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode) 3484 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode) 3485 3486 vmvnCode = ''' 3487 destElem = ~srcElem1; 3488 ''' 3489 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) 3490 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) 3491 3492 vqabsCode = ''' 3493 FPSCR fpscr = (FPSCR) FpscrQc; 3494 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 3495 fpscr.qc = 1; 3496 destElem = ~srcElem1; 3497 } else if (srcElem1 < 0) { 3498 destElem = -srcElem1; 3499 } else { 3500 destElem = srcElem1; 3501 } 3502 FpscrQc = fpscr; 3503 ''' 3504 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode) 3505 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode) 3506 3507 vqnegCode = ''' 3508 FPSCR fpscr = (FPSCR) FpscrQc; 3509 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 3510 fpscr.qc = 1; 3511 destElem = ~srcElem1; 3512 } else { 3513 destElem = -srcElem1; 3514 } 3515 FpscrQc = fpscr; 3516 ''' 3517 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode) 3518 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode) 3519 3520 vabsCode = ''' 3521 if (srcElem1 < 0) { 3522 destElem = -srcElem1; 3523 } else { 3524 destElem = srcElem1; 3525 } 3526 ''' 3527 3528 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode) 3529 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode) 3530 vabsfpCode = ''' 3531 union 3532 { 3533 uint32_t i; 3534 float f; 3535 } cStruct; 3536 cStruct.f = srcReg1; 3537 cStruct.i &= mask(sizeof(Element) * 8 - 1); 3538 destReg = cStruct.f; 3539 ''' 3540 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode) 3541 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode) 3542 3543 vnegCode = ''' 3544 destElem = -srcElem1; 3545 ''' 3546 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode) 3547 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode) 3548 vnegfpCode = ''' 3549 destReg = -srcReg1; 3550 ''' 3551 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode) 3552 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode) 3553 3554 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;' 3555 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode) 3556 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode) 3557 vcgtfpCode = ''' 3558 FPSCR fpscr = (FPSCR) FpscrExc; 3559 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc, 3560 true, true, VfpRoundNearest); 3561 destReg = (res == 0) ? -1 : 0; 3562 if (res == 2.0) 3563 fpscr.ioc = 1; 3564 FpscrExc = fpscr; 3565 ''' 3566 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",), 3567 2, vcgtfpCode, toInt = True) 3568 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",), 3569 4, vcgtfpCode, toInt = True) 3570 3571 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;' 3572 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode) 3573 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode) 3574 vcgefpCode = ''' 3575 FPSCR fpscr = (FPSCR) FpscrExc; 3576 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc, 3577 true, true, VfpRoundNearest); 3578 destReg = (res == 0) ? -1 : 0; 3579 if (res == 2.0) 3580 fpscr.ioc = 1; 3581 FpscrExc = fpscr; 3582 ''' 3583 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",), 3584 2, vcgefpCode, toInt = True) 3585 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",), 3586 4, vcgefpCode, toInt = True) 3587 3588 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;' 3589 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode) 3590 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode) 3591 vceqfpCode = ''' 3592 FPSCR fpscr = (FPSCR) FpscrExc; 3593 float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc, 3594 true, true, VfpRoundNearest); 3595 destReg = (res == 0) ? -1 : 0; 3596 if (res == 2.0) 3597 fpscr.ioc = 1; 3598 FpscrExc = fpscr; 3599 ''' 3600 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",), 3601 2, vceqfpCode, toInt = True) 3602 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",), 3603 4, vceqfpCode, toInt = True) 3604 3605 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;' 3606 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode) 3607 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode) 3608 vclefpCode = ''' 3609 FPSCR fpscr = (FPSCR) FpscrExc; 3610 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc, 3611 true, true, VfpRoundNearest); 3612 destReg = (res == 0) ? -1 : 0; 3613 if (res == 2.0) 3614 fpscr.ioc = 1; 3615 FpscrExc = fpscr; 3616 ''' 3617 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",), 3618 2, vclefpCode, toInt = True) 3619 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",), 3620 4, vclefpCode, toInt = True) 3621 3622 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;' 3623 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode) 3624 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode) 3625 vcltfpCode = ''' 3626 FPSCR fpscr = (FPSCR) FpscrExc; 3627 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc, 3628 true, true, VfpRoundNearest); 3629 destReg = (res == 0) ? -1 : 0; 3630 if (res == 2.0) 3631 fpscr.ioc = 1; 3632 FpscrExc = fpscr; 3633 ''' 3634 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",), 3635 2, vcltfpCode, toInt = True) 3636 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",), 3637 4, vcltfpCode, toInt = True) 3638 3639 vswpCode = ''' 3640 uint32_t mid; 3641 for (unsigned r = 0; r < rCount; r++) { 3642 mid = srcReg1.regs[r]; 3643 srcReg1.regs[r] = destReg.regs[r]; 3644 destReg.regs[r] = mid; 3645 } 3646 ''' 3647 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode) 3648 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode) 3649 3650 vtrnCode = ''' 3651 Element mid; 3652 for (unsigned i = 0; i < eCount; i += 2) { 3653 mid = srcReg1.elements[i]; 3654 srcReg1.elements[i] = destReg.elements[i + 1]; 3655 destReg.elements[i + 1] = mid; 3656 } 3657 ''' 3658 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", 3659 smallUnsignedTypes, 2, vtrnCode) 3660 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", 3661 smallUnsignedTypes, 4, vtrnCode) 3662 3663 vuzpCode = ''' 3664 Element mid[eCount]; 3665 memcpy(&mid, &srcReg1, sizeof(srcReg1)); 3666 for (unsigned i = 0; i < eCount / 2; i++) { 3667 srcReg1.elements[i] = destReg.elements[2 * i + 1]; 3668 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1]; 3669 destReg.elements[i] = destReg.elements[2 * i]; 3670 } 3671 for (unsigned i = 0; i < eCount / 2; i++) { 3672 destReg.elements[eCount / 2 + i] = mid[2 * i]; 3673 } 3674 ''' 3675 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode) 3676 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode) 3677 3678 vzipCode = ''' 3679 Element mid[eCount]; 3680 memcpy(&mid, &destReg, sizeof(destReg)); 3681 for (unsigned i = 0; i < eCount / 2; i++) { 3682 destReg.elements[2 * i] = mid[i]; 3683 destReg.elements[2 * i + 1] = srcReg1.elements[i]; 3684 } 3685 for (int i = 0; i < eCount / 2; i++) { 3686 srcReg1.elements[2 * i] = mid[eCount / 2 + i]; 3687 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i]; 3688 } 3689 ''' 3690 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode) 3691 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode) 3692 3693 vmovnCode = 'destElem = srcElem1;' 3694 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode) 3695 3696 vdupCode = 'destElem = srcElem1;' 3697 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode) 3698 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode) 3699 3700 def vdupGprInst(name, Name, opClass, types, rCount): 3701 global header_output, exec_output 3702 eWalkCode = simdEnabledCheckCode + ''' 3703 RegVect destReg; 3704 for (unsigned i = 0; i < eCount; i++) { 3705 destReg.elements[i] = htog((Element)Op1); 3706 } 3707 ''' 3708 for reg in range(rCount): 3709 eWalkCode += ''' 3710 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3711 ''' % { "reg" : reg } 3712 iop = InstObjParams(name, Name, 3713 "RegRegOp", 3714 { "code": eWalkCode, 3715 "r_count": rCount, 3716 "predicate_test": predicateTest, 3717 "op_class": opClass }, []) 3718 header_output += NeonRegRegOpDeclare.subst(iop) 3719 exec_output += NeonEqualRegExecute.subst(iop) 3720 for type in types: 3721 substDict = { "targs" : type, 3722 "class_name" : Name } 3723 exec_output += NeonExecDeclare.subst(substDict) 3724 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2) 3725 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4) 3726 3727 vmovCode = 'destElem = imm;' 3728 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode) 3729 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode) 3730 3731 vorrCode = 'destElem |= imm;' 3732 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True) 3733 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True) 3734 3735 vmvnCode = 'destElem = ~imm;' 3736 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode) 3737 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) 3738 3739 vbicCode = 'destElem &= ~imm;' 3740 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True) 3741 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True) 3742 3743 vqmovnCode = ''' 3744 FPSCR fpscr = (FPSCR) FpscrQc; 3745 destElem = srcElem1; 3746 if ((BigElement)destElem != srcElem1) { 3747 fpscr.qc = 1; 3748 destElem = mask(sizeof(Element) * 8 - 1); 3749 if (srcElem1 < 0) 3750 destElem = ~destElem; 3751 } 3752 FpscrQc = fpscr; 3753 ''' 3754 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode) 3755 3756 vqmovunCode = ''' 3757 FPSCR fpscr = (FPSCR) FpscrQc; 3758 destElem = srcElem1; 3759 if ((BigElement)destElem != srcElem1) { 3760 fpscr.qc = 1; 3761 destElem = mask(sizeof(Element) * 8); 3762 } 3763 FpscrQc = fpscr; 3764 ''' 3765 twoRegNarrowMiscInst("vqmovun", "NVqmovun", 3766 "SimdMiscOp", smallUnsignedTypes, vqmovunCode) 3767 3768 vqmovunsCode = ''' 3769 FPSCR fpscr = (FPSCR) FpscrQc; 3770 destElem = srcElem1; 3771 if (srcElem1 < 0 || 3772 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 3773 fpscr.qc = 1; 3774 destElem = mask(sizeof(Element) * 8); 3775 if (srcElem1 < 0) 3776 destElem = ~destElem; 3777 } 3778 FpscrQc = fpscr; 3779 ''' 3780 twoRegNarrowMiscInst("vqmovun", "NVqmovuns", 3781 "SimdMiscOp", smallSignedTypes, vqmovunsCode) 3782 3783 def buildVext(name, Name, opClass, types, rCount, op): 3784 global header_output, exec_output 3785 eWalkCode = simdEnabledCheckCode + ''' 3786 RegVect srcReg1, srcReg2, destReg; 3787 ''' 3788 for reg in range(rCount): 3789 eWalkCode += ''' 3790 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw); 3791 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw); 3792 ''' % { "reg" : reg } 3793 eWalkCode += op 3794 for reg in range(rCount): 3795 eWalkCode += ''' 3796 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 3797 ''' % { "reg" : reg } 3798 iop = InstObjParams(name, Name, 3799 "RegRegRegImmOp", 3800 { "code": eWalkCode, 3801 "r_count": rCount, 3802 "predicate_test": predicateTest, 3803 "op_class": opClass }, []) 3804 header_output += NeonRegRegRegImmOpDeclare.subst(iop) 3805 exec_output += NeonEqualRegExecute.subst(iop) 3806 for type in types: 3807 substDict = { "targs" : type, 3808 "class_name" : Name } 3809 exec_output += NeonExecDeclare.subst(substDict) 3810 3811 vextCode = ''' 3812 for (unsigned i = 0; i < eCount; i++) { 3813 unsigned index = i + imm; 3814 if (index < eCount) { 3815 destReg.elements[i] = srcReg1.elements[index]; 3816 } else { 3817 index -= eCount; 3818 if (index >= eCount) { 3819 fault = std::make_shared<UndefinedInstruction>(machInst, 3820 false, 3821 mnemonic); 3822 } else { 3823 destReg.elements[i] = srcReg2.elements[index]; 3824 } 3825 } 3826 } 3827 ''' 3828 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode) 3829 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode) 3830 3831 def buildVtbxl(name, Name, opClass, length, isVtbl): 3832 global header_output, decoder_output, exec_output 3833 code = simdEnabledCheckCode + ''' 3834 union 3835 { 3836 uint8_t bytes[32]; 3837 uint32_t regs[8]; 3838 } table; 3839 3840 union 3841 { 3842 uint8_t bytes[8]; 3843 uint32_t regs[2]; 3844 } destReg, srcReg2; 3845 3846 const unsigned length = %(length)d; 3847 const bool isVtbl = %(isVtbl)s; 3848 3849 srcReg2.regs[0] = htog(FpOp2P0_uw); 3850 srcReg2.regs[1] = htog(FpOp2P1_uw); 3851 3852 destReg.regs[0] = htog(FpDestP0_uw); 3853 destReg.regs[1] = htog(FpDestP1_uw); 3854 ''' % { "length" : length, "isVtbl" : isVtbl } 3855 for reg in range(8): 3856 if reg < length * 2: 3857 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \ 3858 { "reg" : reg } 3859 else: 3860 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg } 3861 code += ''' 3862 for (unsigned i = 0; i < sizeof(destReg); i++) { 3863 uint8_t index = srcReg2.bytes[i]; 3864 if (index < 8 * length) { 3865 destReg.bytes[i] = table.bytes[index]; 3866 } else { 3867 if (isVtbl) 3868 destReg.bytes[i] = 0; 3869 // else destReg.bytes[i] unchanged 3870 } 3871 } 3872 3873 FpDestP0_uw = gtoh(destReg.regs[0]); 3874 FpDestP1_uw = gtoh(destReg.regs[1]); 3875 ''' 3876 iop = InstObjParams(name, Name, 3877 "RegRegRegOp", 3878 { "code": code, 3879 "predicate_test": predicateTest, 3880 "op_class": opClass }, []) 3881 header_output += RegRegRegOpDeclare.subst(iop) 3882 decoder_output += RegRegRegOpConstructor.subst(iop) 3883 exec_output += PredOpExecute.subst(iop) 3884 3885 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true") 3886 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true") 3887 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true") 3888 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true") 3889 3890 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false") 3891 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false") 3892 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false") 3893 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false") 3894}}; 3895