neon64.isa revision 10197
1// -*- mode: c++ -*- 2 3// Copyright (c) 2012-2013 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Giacomo Gabrielli 39// Mbou Eyole 40 41let {{ 42 43 header_output = "" 44 exec_output = "" 45 46 # FP types (FP operations always work with unsigned representations) 47 floatTypes = ("uint32_t", "uint64_t") 48 smallFloatTypes = ("uint32_t",) 49 50 def threeEqualRegInstX(name, Name, opClass, types, rCount, op, 51 readDest=False, pairwise=False, scalar=False, 52 byElem=False): 53 assert (not pairwise) or ((not byElem) and (not scalar)) 54 global header_output, exec_output 55 eWalkCode = simd64EnabledCheckCode + ''' 56 RegVect srcReg1, destReg; 57 ''' 58 if byElem: 59 # 2nd register operand has to be read fully 60 eWalkCode += ''' 61 FullRegVect srcReg2; 62 ''' 63 else: 64 eWalkCode += ''' 65 RegVect srcReg2; 66 ''' 67 for reg in range(rCount): 68 eWalkCode += ''' 69 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 70 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 71 ''' % { "reg" : reg } 72 if readDest: 73 eWalkCode += ''' 74 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 75 ''' % { "reg" : reg } 76 if byElem: 77 # 2nd operand has to be read fully 78 for reg in range(rCount, 4): 79 eWalkCode += ''' 80 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 81 ''' % { "reg" : reg } 82 readDestCode = '' 83 if readDest: 84 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 85 if pairwise: 86 eWalkCode += ''' 87 for (unsigned i = 0; i < eCount; i++) { 88 Element srcElem1 = gtoh(2 * i < eCount ? 89 srcReg1.elements[2 * i] : 90 srcReg2.elements[2 * i - eCount]); 91 Element srcElem2 = gtoh(2 * i < eCount ? 92 srcReg1.elements[2 * i + 1] : 93 srcReg2.elements[2 * i + 1 - eCount]); 94 Element destElem; 95 %(readDest)s 96 %(op)s 97 destReg.elements[i] = htog(destElem); 98 } 99 ''' % { "op" : op, "readDest" : readDestCode } 100 else: 101 scalarCheck = ''' 102 if (i != 0) { 103 destReg.elements[i] = 0; 104 continue; 105 } 106 ''' 107 eWalkCode += ''' 108 for (unsigned i = 0; i < eCount; i++) { 109 %(scalarCheck)s 110 Element srcElem1 = gtoh(srcReg1.elements[i]); 111 Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); 112 Element destElem; 113 %(readDest)s 114 %(op)s 115 destReg.elements[i] = htog(destElem); 116 } 117 ''' % { "op" : op, "readDest" : readDestCode, 118 "scalarCheck" : scalarCheck if scalar else "", 119 "src2Index" : "imm" if byElem else "i" } 120 for reg in range(rCount): 121 eWalkCode += ''' 122 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 123 ''' % { "reg" : reg } 124 if rCount < 4: # zero upper half 125 for reg in range(rCount, 4): 126 eWalkCode += ''' 127 AA64FpDestP%(reg)d_uw = 0; 128 ''' % { "reg" : reg } 129 iop = InstObjParams(name, Name, 130 "DataX2RegImmOp" if byElem else "DataX2RegOp", 131 { "code": eWalkCode, 132 "r_count": rCount, 133 "op_class": opClass }, []) 134 if byElem: 135 header_output += NeonX2RegImmOpDeclare.subst(iop) 136 else: 137 header_output += NeonX2RegOpDeclare.subst(iop) 138 exec_output += NeonXEqualRegOpExecute.subst(iop) 139 for type in types: 140 substDict = { "targs" : type, 141 "class_name" : Name } 142 exec_output += NeonXExecDeclare.subst(substDict) 143 144 def threeUnequalRegInstX(name, Name, opClass, types, op, 145 bigSrc1, bigSrc2, bigDest, readDest, scalar=False, 146 byElem=False, hi=False): 147 assert not (scalar and hi) 148 global header_output, exec_output 149 src1Cnt = src2Cnt = destCnt = 2 150 src1Prefix = src2Prefix = destPrefix = '' 151 if bigSrc1: 152 src1Cnt = 4 153 src1Prefix = 'Big' 154 if bigSrc2: 155 src2Cnt = 4 156 src2Prefix = 'Big' 157 if bigDest: 158 destCnt = 4 159 destPrefix = 'Big' 160 if byElem: 161 src2Prefix = 'Full' 162 eWalkCode = simd64EnabledCheckCode + ''' 163 %sRegVect srcReg1; 164 %sRegVect srcReg2; 165 %sRegVect destReg; 166 ''' % (src1Prefix, src2Prefix, destPrefix) 167 srcReg1 = 0 168 if hi and not bigSrc1: # long/widening operations 169 srcReg1 = 2 170 for reg in range(src1Cnt): 171 eWalkCode += ''' 172 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw); 173 ''' % { "reg" : reg, "srcReg1" : srcReg1 } 174 srcReg1 += 1 175 srcReg2 = 0 176 if (not byElem) and (hi and not bigSrc2): # long/widening operations 177 srcReg2 = 2 178 for reg in range(src2Cnt): 179 eWalkCode += ''' 180 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw); 181 ''' % { "reg" : reg, "srcReg2" : srcReg2 } 182 srcReg2 += 1 183 if byElem: 184 # 2nd operand has to be read fully 185 for reg in range(src2Cnt, 4): 186 eWalkCode += ''' 187 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 188 ''' % { "reg" : reg } 189 if readDest: 190 for reg in range(destCnt): 191 eWalkCode += ''' 192 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 193 ''' % { "reg" : reg } 194 readDestCode = '' 195 if readDest: 196 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 197 scalarCheck = ''' 198 if (i != 0) { 199 destReg.elements[i] = 0; 200 continue; 201 } 202 ''' 203 eWalkCode += ''' 204 for (unsigned i = 0; i < eCount; i++) { 205 %(scalarCheck)s 206 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 207 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); 208 %(destPrefix)sElement destElem; 209 %(readDest)s 210 %(op)s 211 destReg.elements[i] = htog(destElem); 212 } 213 ''' % { "op" : op, "readDest" : readDestCode, 214 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 215 "destPrefix" : destPrefix, 216 "scalarCheck" : scalarCheck if scalar else "", 217 "src2Index" : "imm" if byElem else "i" } 218 destReg = 0 219 if hi and not bigDest: 220 # narrowing operations 221 destReg = 2 222 for reg in range(destCnt): 223 eWalkCode += ''' 224 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); 225 ''' % { "reg" : reg, "destReg": destReg } 226 destReg += 1 227 if destCnt < 4 and not hi: # zero upper half 228 for reg in range(destCnt, 4): 229 eWalkCode += ''' 230 AA64FpDestP%(reg)d_uw = 0; 231 ''' % { "reg" : reg } 232 iop = InstObjParams(name, Name, 233 "DataX2RegImmOp" if byElem else "DataX2RegOp", 234 { "code": eWalkCode, 235 "r_count": 2, 236 "op_class": opClass }, []) 237 if byElem: 238 header_output += NeonX2RegImmOpDeclare.subst(iop) 239 else: 240 header_output += NeonX2RegOpDeclare.subst(iop) 241 exec_output += NeonXUnequalRegOpExecute.subst(iop) 242 for type in types: 243 substDict = { "targs" : type, 244 "class_name" : Name } 245 exec_output += NeonXExecDeclare.subst(substDict) 246 247 def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False, 248 scalar=False, byElem=False, hi=False): 249 assert not byElem 250 threeUnequalRegInstX(name, Name, opClass, types, op, 251 True, True, False, readDest, scalar, byElem, hi) 252 253 def threeRegLongInstX(name, Name, opClass, types, op, readDest=False, 254 scalar=False, byElem=False, hi=False): 255 threeUnequalRegInstX(name, Name, opClass, types, op, 256 False, False, True, readDest, scalar, byElem, hi) 257 258 def threeRegWideInstX(name, Name, opClass, types, op, readDest=False, 259 scalar=False, byElem=False, hi=False): 260 assert not byElem 261 threeUnequalRegInstX(name, Name, opClass, types, op, 262 True, False, True, readDest, scalar, byElem, hi) 263 264 def twoEqualRegInstX(name, Name, opClass, types, rCount, op, 265 readDest=False, scalar=False, byElem=False, 266 hasImm=False, isDup=False): 267 global header_output, exec_output 268 assert (not isDup) or byElem 269 if byElem: 270 hasImm = True 271 if isDup: 272 eWalkCode = simd64EnabledCheckCode + ''' 273 FullRegVect srcReg1; 274 RegVect destReg; 275 ''' 276 else: 277 eWalkCode = simd64EnabledCheckCode + ''' 278 RegVect srcReg1, destReg; 279 ''' 280 for reg in range(4 if isDup else rCount): 281 eWalkCode += ''' 282 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 283 ''' % { "reg" : reg } 284 if readDest: 285 eWalkCode += ''' 286 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 287 ''' % { "reg" : reg } 288 readDestCode = '' 289 if readDest: 290 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 291 scalarCheck = ''' 292 if (i != 0) { 293 destReg.elements[i] = 0; 294 continue; 295 } 296 ''' 297 eWalkCode += ''' 298 for (unsigned i = 0; i < eCount; i++) { 299 %(scalarCheck)s 300 unsigned j = i; 301 Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]); 302 Element destElem; 303 %(readDest)s 304 %(op)s 305 destReg.elements[j] = htog(destElem); 306 } 307 ''' % { "op" : op, "readDest" : readDestCode, 308 "scalarCheck" : scalarCheck if scalar else "", 309 "src1Index" : "imm" if byElem else "i" } 310 for reg in range(rCount): 311 eWalkCode += ''' 312 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 313 ''' % { "reg" : reg } 314 if rCount < 4: # zero upper half 315 for reg in range(rCount, 4): 316 eWalkCode += ''' 317 AA64FpDestP%(reg)d_uw = 0; 318 ''' % { "reg" : reg } 319 iop = InstObjParams(name, Name, 320 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 321 { "code": eWalkCode, 322 "r_count": rCount, 323 "op_class": opClass }, []) 324 if hasImm: 325 header_output += NeonX1RegImmOpDeclare.subst(iop) 326 else: 327 header_output += NeonX1RegOpDeclare.subst(iop) 328 exec_output += NeonXEqualRegOpExecute.subst(iop) 329 for type in types: 330 substDict = { "targs" : type, 331 "class_name" : Name } 332 exec_output += NeonXExecDeclare.subst(substDict) 333 334 def twoRegLongInstX(name, Name, opClass, types, op, readDest=False, 335 hi=False, hasImm=False): 336 global header_output, exec_output 337 eWalkCode = simd64EnabledCheckCode + ''' 338 RegVect srcReg1; 339 BigRegVect destReg; 340 ''' 341 destReg = 0 if not hi else 2 342 for reg in range(2): 343 eWalkCode += ''' 344 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw); 345 ''' % { "reg" : reg, "destReg": destReg } 346 destReg += 1 347 destReg = 0 if not hi else 2 348 if readDest: 349 for reg in range(4): 350 eWalkCode += ''' 351 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 352 ''' % { "reg" : reg } 353 destReg += 1 354 readDestCode = '' 355 if readDest: 356 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 357 eWalkCode += ''' 358 for (unsigned i = 0; i < eCount; i++) { 359 Element srcElem1 = gtoh(srcReg1.elements[i]); 360 BigElement destElem; 361 %(readDest)s 362 %(op)s 363 destReg.elements[i] = htog(destElem); 364 } 365 ''' % { "op" : op, "readDest" : readDestCode } 366 for reg in range(4): 367 eWalkCode += ''' 368 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 369 ''' % { "reg" : reg } 370 iop = InstObjParams(name, Name, 371 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 372 { "code": eWalkCode, 373 "r_count": 2, 374 "op_class": opClass }, []) 375 if hasImm: 376 header_output += NeonX1RegImmOpDeclare.subst(iop) 377 else: 378 header_output += NeonX1RegOpDeclare.subst(iop) 379 exec_output += NeonXUnequalRegOpExecute.subst(iop) 380 for type in types: 381 substDict = { "targs" : type, 382 "class_name" : Name } 383 exec_output += NeonXExecDeclare.subst(substDict) 384 385 def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False, 386 scalar=False, hi=False, hasImm=False): 387 global header_output, exec_output 388 eWalkCode = simd64EnabledCheckCode + ''' 389 BigRegVect srcReg1; 390 RegVect destReg; 391 ''' 392 for reg in range(4): 393 eWalkCode += ''' 394 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 395 ''' % { "reg" : reg } 396 if readDest: 397 for reg in range(2): 398 eWalkCode += ''' 399 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 400 ''' % { "reg" : reg } 401 else: 402 eWalkCode += ''' 403 destReg.elements[0] = 0; 404 ''' % { "reg" : reg } 405 readDestCode = '' 406 if readDest: 407 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 408 scalarCheck = ''' 409 if (i != 0) { 410 destReg.elements[i] = 0; 411 continue; 412 } 413 ''' 414 eWalkCode += ''' 415 for (unsigned i = 0; i < eCount; i++) { 416 %(scalarCheck)s 417 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 418 Element destElem; 419 %(readDest)s 420 %(op)s 421 destReg.elements[i] = htog(destElem); 422 } 423 ''' % { "op" : op, "readDest" : readDestCode, 424 "scalarCheck" : scalarCheck if scalar else "" } 425 destReg = 0 if not hi else 2 426 for reg in range(2): 427 eWalkCode += ''' 428 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); 429 ''' % { "reg" : reg, "destReg": destReg } 430 destReg += 1 431 if not hi: 432 for reg in range(2, 4): # zero upper half 433 eWalkCode += ''' 434 AA64FpDestP%(reg)d_uw = 0; 435 ''' % { "reg" : reg } 436 iop = InstObjParams(name, Name, 437 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 438 { "code": eWalkCode, 439 "r_count": 2, 440 "op_class": opClass }, []) 441 if hasImm: 442 header_output += NeonX1RegImmOpDeclare.subst(iop) 443 else: 444 header_output += NeonX1RegOpDeclare.subst(iop) 445 exec_output += NeonXUnequalRegOpExecute.subst(iop) 446 for type in types: 447 substDict = { "targs" : type, 448 "class_name" : Name } 449 exec_output += NeonXExecDeclare.subst(substDict) 450 451 def threeRegScrambleInstX(name, Name, opClass, types, rCount, op): 452 global header_output, exec_output 453 eWalkCode = simd64EnabledCheckCode + ''' 454 RegVect srcReg1, srcReg2, destReg; 455 ''' 456 for reg in range(rCount): 457 eWalkCode += ''' 458 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 459 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 460 ''' % { "reg" : reg } 461 eWalkCode += op 462 for reg in range(rCount): 463 eWalkCode += ''' 464 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 465 ''' % { "reg" : reg } 466 if rCount < 4: 467 for reg in range(rCount, 4): 468 eWalkCode += ''' 469 AA64FpDestP%(reg)d_uw = 0; 470 ''' % { "reg" : reg } 471 iop = InstObjParams(name, Name, 472 "DataX2RegOp", 473 { "code": eWalkCode, 474 "r_count": rCount, 475 "op_class": opClass }, []) 476 header_output += NeonX2RegOpDeclare.subst(iop) 477 exec_output += NeonXEqualRegOpExecute.subst(iop) 478 for type in types: 479 substDict = { "targs" : type, 480 "class_name" : Name } 481 exec_output += NeonXExecDeclare.subst(substDict) 482 483 def insFromVecElemInstX(name, Name, opClass, types, rCount): 484 global header_output, exec_output 485 eWalkCode = simd64EnabledCheckCode + ''' 486 FullRegVect srcReg1; 487 RegVect destReg; 488 ''' 489 for reg in range(4): 490 eWalkCode += ''' 491 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 492 ''' % { "reg" : reg } 493 for reg in range(rCount): 494 eWalkCode += ''' 495 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 496 ''' % { "reg" : reg } 497 eWalkCode += ''' 498 Element srcElem1 = gtoh(srcReg1.elements[imm2]); 499 Element destElem = srcElem1; 500 destReg.elements[imm1] = htog(destElem); 501 ''' 502 for reg in range(rCount): 503 eWalkCode += ''' 504 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 505 ''' % { "reg" : reg } 506 iop = InstObjParams(name, Name, 507 "DataX1Reg2ImmOp", 508 { "code": eWalkCode, 509 "r_count": rCount, 510 "op_class": opClass }, []) 511 header_output += NeonX1Reg2ImmOpDeclare.subst(iop) 512 exec_output += NeonXEqualRegOpExecute.subst(iop) 513 for type in types: 514 substDict = { "targs" : type, 515 "class_name" : Name } 516 exec_output += NeonXExecDeclare.subst(substDict) 517 518 def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op): 519 global header_output, exec_output 520 eWalkCode = simd64EnabledCheckCode + ''' 521 RegVect srcReg1, destReg; 522 ''' 523 for reg in range(rCount): 524 eWalkCode += ''' 525 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 526 ''' % { "reg" : reg } 527 eWalkCode += ''' 528 Element srcElem1 = gtoh(srcReg1.elements[0]); 529 Element srcElem2 = gtoh(srcReg1.elements[1]); 530 Element destElem; 531 %(op)s 532 destReg.elements[0] = htog(destElem); 533 ''' % { "op" : op } 534 destCnt = rCount / 2 535 for reg in range(destCnt): 536 eWalkCode += ''' 537 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 538 ''' % { "reg" : reg } 539 for reg in range(destCnt, 4): # zero upper half 540 eWalkCode += ''' 541 AA64FpDestP%(reg)d_uw = 0; 542 ''' % { "reg" : reg } 543 iop = InstObjParams(name, Name, 544 "DataX1RegOp", 545 { "code": eWalkCode, 546 "r_count": rCount, 547 "op_class": opClass }, []) 548 header_output += NeonX1RegOpDeclare.subst(iop) 549 exec_output += NeonXEqualRegOpExecute.subst(iop) 550 for type in types: 551 substDict = { "targs" : type, 552 "class_name" : Name } 553 exec_output += NeonXExecDeclare.subst(substDict) 554 555 def twoRegAcrossInstX(name, Name, opClass, types, rCount, op, 556 doubleDest=False, long=False): 557 global header_output, exec_output 558 destPrefix = "Big" if long else "" 559 eWalkCode = simd64EnabledCheckCode + ''' 560 RegVect srcReg1; 561 %sRegVect destReg; 562 ''' % destPrefix 563 for reg in range(rCount): 564 eWalkCode += ''' 565 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 566 ''' % { "reg" : reg } 567 eWalkCode += ''' 568 destReg.regs[0] = 0; 569 %(destPrefix)sElement destElem = 0; 570 for (unsigned i = 0; i < eCount; i++) { 571 Element srcElem1 = gtoh(srcReg1.elements[i]); 572 if (i == 0) { 573 destElem = srcElem1; 574 } else { 575 %(op)s 576 } 577 } 578 destReg.elements[0] = htog(destElem); 579 ''' % { "op" : op, "destPrefix" : destPrefix } 580 destCnt = 2 if doubleDest else 1 581 for reg in range(destCnt): 582 eWalkCode += ''' 583 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 584 ''' % { "reg" : reg } 585 for reg in range(destCnt, 4): # zero upper half 586 eWalkCode += ''' 587 AA64FpDestP%(reg)d_uw = 0; 588 ''' % { "reg" : reg } 589 iop = InstObjParams(name, Name, 590 "DataX1RegOp", 591 { "code": eWalkCode, 592 "r_count": rCount, 593 "op_class": opClass }, []) 594 header_output += NeonX1RegOpDeclare.subst(iop) 595 if long: 596 exec_output += NeonXUnequalRegOpExecute.subst(iop) 597 else: 598 exec_output += NeonXEqualRegOpExecute.subst(iop) 599 for type in types: 600 substDict = { "targs" : type, 601 "class_name" : Name } 602 exec_output += NeonXExecDeclare.subst(substDict) 603 604 def twoRegCondenseInstX(name, Name, opClass, types, rCount, op, 605 readDest=False): 606 global header_output, exec_output 607 eWalkCode = simd64EnabledCheckCode + ''' 608 RegVect srcRegs; 609 BigRegVect destReg; 610 ''' 611 for reg in range(rCount): 612 eWalkCode += ''' 613 srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 614 ''' % { "reg" : reg } 615 if readDest: 616 eWalkCode += ''' 617 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 618 ''' % { "reg" : reg } 619 readDestCode = '' 620 if readDest: 621 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 622 eWalkCode += ''' 623 for (unsigned i = 0; i < eCount / 2; i++) { 624 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 625 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 626 BigElement destElem; 627 %(readDest)s 628 %(op)s 629 destReg.elements[i] = htog(destElem); 630 } 631 ''' % { "op" : op, "readDest" : readDestCode } 632 for reg in range(rCount): 633 eWalkCode += ''' 634 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 635 ''' % { "reg" : reg } 636 if rCount < 4: # zero upper half 637 for reg in range(rCount, 4): 638 eWalkCode += ''' 639 AA64FpDestP%(reg)d_uw = 0; 640 ''' % { "reg" : reg } 641 iop = InstObjParams(name, Name, 642 "DataX1RegOp", 643 { "code": eWalkCode, 644 "r_count": rCount, 645 "op_class": opClass }, []) 646 header_output += NeonX1RegOpDeclare.subst(iop) 647 exec_output += NeonXUnequalRegOpExecute.subst(iop) 648 for type in types: 649 substDict = { "targs" : type, 650 "class_name" : Name } 651 exec_output += NeonXExecDeclare.subst(substDict) 652 653 def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False): 654 global header_output, exec_output 655 eWalkCode = simd64EnabledCheckCode + ''' 656 RegVect destReg; 657 ''' 658 if readDest: 659 for reg in range(rCount): 660 eWalkCode += ''' 661 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 662 ''' % { "reg" : reg } 663 readDestCode = '' 664 if readDest: 665 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 666 eWalkCode += ''' 667 for (unsigned i = 0; i < eCount; i++) { 668 Element destElem; 669 %(readDest)s 670 %(op)s 671 destReg.elements[i] = htog(destElem); 672 } 673 ''' % { "op" : op, "readDest" : readDestCode } 674 for reg in range(rCount): 675 eWalkCode += ''' 676 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 677 ''' % { "reg" : reg } 678 if rCount < 4: # zero upper half 679 for reg in range(rCount, 4): 680 eWalkCode += ''' 681 AA64FpDestP%(reg)d_uw = 0; 682 ''' % { "reg" : reg } 683 iop = InstObjParams(name, Name, 684 "DataXImmOnlyOp", 685 { "code": eWalkCode, 686 "r_count": rCount, 687 "op_class": opClass }, []) 688 header_output += NeonX1RegImmOnlyOpDeclare.subst(iop) 689 exec_output += NeonXEqualRegOpExecute.subst(iop) 690 for type in types: 691 substDict = { "targs" : type, 692 "class_name" : Name } 693 exec_output += NeonXExecDeclare.subst(substDict) 694 695 def dupGprInstX(name, Name, opClass, types, rCount, gprSpec): 696 global header_output, exec_output 697 eWalkCode = simd64EnabledCheckCode + ''' 698 RegVect destReg; 699 for (unsigned i = 0; i < eCount; i++) { 700 destReg.elements[i] = htog((Element) %sOp1); 701 } 702 ''' % gprSpec 703 for reg in range(rCount): 704 eWalkCode += ''' 705 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 706 ''' % { "reg" : reg } 707 if rCount < 4: # zero upper half 708 for reg in range(rCount, 4): 709 eWalkCode += ''' 710 AA64FpDestP%(reg)d_uw = 0; 711 ''' % { "reg" : reg } 712 iop = InstObjParams(name, Name, 713 "DataX1RegOp", 714 { "code": eWalkCode, 715 "r_count": rCount, 716 "op_class": opClass }, []) 717 header_output += NeonX1RegOpDeclare.subst(iop) 718 exec_output += NeonXEqualRegOpExecute.subst(iop) 719 for type in types: 720 substDict = { "targs" : type, 721 "class_name" : Name } 722 exec_output += NeonXExecDeclare.subst(substDict) 723 724 def extInstX(name, Name, opClass, types, rCount, op): 725 global header_output, exec_output 726 eWalkCode = simd64EnabledCheckCode + ''' 727 RegVect srcReg1, srcReg2, destReg; 728 ''' 729 for reg in range(rCount): 730 eWalkCode += ''' 731 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 732 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 733 ''' % { "reg" : reg } 734 eWalkCode += op 735 for reg in range(rCount): 736 eWalkCode += ''' 737 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 738 ''' % { "reg" : reg } 739 if rCount < 4: # zero upper half 740 for reg in range(rCount, 4): 741 eWalkCode += ''' 742 AA64FpDestP%(reg)d_uw = 0; 743 ''' % { "reg" : reg } 744 iop = InstObjParams(name, Name, 745 "DataX2RegImmOp", 746 { "code": eWalkCode, 747 "r_count": rCount, 748 "op_class": opClass }, []) 749 header_output += NeonX2RegImmOpDeclare.subst(iop) 750 exec_output += NeonXEqualRegOpExecute.subst(iop) 751 for type in types: 752 substDict = { "targs" : type, 753 "class_name" : Name } 754 exec_output += NeonXExecDeclare.subst(substDict) 755 756 def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec): 757 global header_output, exec_output 758 eWalkCode = simd64EnabledCheckCode + ''' 759 RegVect destReg; 760 ''' 761 for reg in range(rCount): 762 eWalkCode += ''' 763 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 764 ''' % { "reg" : reg } 765 eWalkCode += ''' 766 destReg.elements[imm] = htog((Element) %sOp1); 767 ''' % gprSpec 768 for reg in range(rCount): 769 eWalkCode += ''' 770 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 771 ''' % { "reg" : reg } 772 iop = InstObjParams(name, Name, 773 "DataX1RegImmOp", 774 { "code": eWalkCode, 775 "r_count": rCount, 776 "op_class": opClass }, []) 777 header_output += NeonX1RegImmOpDeclare.subst(iop) 778 exec_output += NeonXEqualRegOpExecute.subst(iop) 779 for type in types: 780 substDict = { "targs" : type, 781 "class_name" : Name } 782 exec_output += NeonXExecDeclare.subst(substDict) 783 784 def insToGprInstX(name, Name, opClass, types, rCount, gprSpec, 785 signExt=False): 786 global header_output, exec_output 787 eWalkCode = simd64EnabledCheckCode + ''' 788 FullRegVect srcReg; 789 ''' 790 for reg in range(4): 791 eWalkCode += ''' 792 srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 793 ''' % { "reg" : reg } 794 if signExt: 795 eWalkCode += ''' 796 %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]); 797 ''' % gprSpec 798 else: 799 eWalkCode += ''' 800 %sDest = srcReg.elements[imm]; 801 ''' % gprSpec 802 iop = InstObjParams(name, Name, 803 "DataX1RegImmOp", 804 { "code": eWalkCode, 805 "r_count": rCount, 806 "op_class": opClass }, []) 807 header_output += NeonX1RegImmOpDeclare.subst(iop) 808 exec_output += NeonXEqualRegOpExecute.subst(iop) 809 for type in types: 810 substDict = { "targs" : type, 811 "class_name" : Name } 812 exec_output += NeonXExecDeclare.subst(substDict) 813 814 def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount): 815 global header_output, decoder_output, exec_output 816 code = simd64EnabledCheckCode + ''' 817 union 818 { 819 uint8_t bytes[64]; 820 FloatRegBits regs[16]; 821 } table; 822 823 union 824 { 825 uint8_t bytes[%(rCount)d * 4]; 826 FloatRegBits regs[%(rCount)d]; 827 } destReg, srcReg2; 828 829 const unsigned length = %(length)d; 830 const bool isTbl = %(isTbl)s; 831 ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl } 832 for reg in range(rCount): 833 code += ''' 834 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 835 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 836 ''' % { "reg" : reg } 837 for reg in range(16): 838 if reg < length * 4: 839 code += ''' 840 table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw); 841 ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 } 842 else: 843 code += ''' 844 table.regs[%(reg)d] = 0; 845 ''' % { "reg" : reg } 846 code += ''' 847 for (unsigned i = 0; i < sizeof(destReg); i++) { 848 uint8_t index = srcReg2.bytes[i]; 849 if (index < 16 * length) { 850 destReg.bytes[i] = table.bytes[index]; 851 } else { 852 if (isTbl) 853 destReg.bytes[i] = 0; 854 // else destReg.bytes[i] unchanged 855 } 856 } 857 ''' 858 for reg in range(rCount): 859 code += ''' 860 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 861 ''' % { "reg" : reg } 862 if rCount < 4: # zero upper half 863 for reg in range(rCount, 4): 864 code += ''' 865 AA64FpDestP%(reg)d_uw = 0; 866 ''' % { "reg" : reg } 867 iop = InstObjParams(name, Name, 868 "DataX2RegOp", 869 { "code": code, 870 "r_count": rCount, 871 "op_class": opClass }, []) 872 header_output += NeonX2RegOpDeclare.subst(iop) 873 exec_output += NeonXEqualRegOpExecute.subst(iop) 874 for type in types: 875 substDict = { "targs" : type, 876 "class_name" : Name } 877 exec_output += NeonXExecDeclare.subst(substDict) 878 879 # ABS 880 absCode = ''' 881 if (srcElem1 < 0) { 882 destElem = -srcElem1; 883 } else { 884 destElem = srcElem1; 885 } 886 ''' 887 twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode) 888 twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode) 889 # ADD 890 addCode = "destElem = srcElem1 + srcElem2;" 891 threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode) 892 threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode) 893 # ADDHN, ADDHN2 894 addhnCode = ''' 895 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 896 (sizeof(Element) * 8); 897 ''' 898 threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes, 899 addhnCode) 900 threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes, 901 addhnCode, hi=True) 902 # ADDP (scalar) 903 twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4, 904 addCode) 905 # ADDP (vector) 906 threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2, 907 addCode, pairwise=True) 908 threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4, 909 addCode, pairwise=True) 910 # ADDV 911 # Note: SimdAddOp can be a bit optimistic here 912 addAcrossCode = "destElem += srcElem1;" 913 twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"), 914 2, addAcrossCode) 915 twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4, 916 addAcrossCode) 917 # AND 918 andCode = "destElem = srcElem1 & srcElem2;" 919 threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode) 920 threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode) 921 # BIC (immediate) 922 bicImmCode = "destElem &= ~imm;" 923 oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2, 924 bicImmCode, True) 925 oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4, 926 bicImmCode, True) 927 # BIC (register) 928 bicCode = "destElem = srcElem1 & ~srcElem2;" 929 threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode) 930 threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode) 931 # BIF 932 bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);" 933 threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode, 934 True) 935 threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode, 936 True) 937 # BIT 938 bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);" 939 threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode, 940 True) 941 threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode, 942 True) 943 # BSL 944 bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);" 945 threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode, 946 True) 947 threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode, 948 True) 949 # CLS 950 clsCode = ''' 951 unsigned count = 0; 952 if (srcElem1 < 0) { 953 srcElem1 <<= 1; 954 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 955 count++; 956 srcElem1 <<= 1; 957 } 958 } else { 959 srcElem1 <<= 1; 960 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 961 count++; 962 srcElem1 <<= 1; 963 } 964 } 965 destElem = count; 966 ''' 967 twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode) 968 twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode) 969 # CLZ 970 clzCode = ''' 971 unsigned count = 0; 972 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 973 count++; 974 srcElem1 <<= 1; 975 } 976 destElem = count; 977 ''' 978 twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode) 979 twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode) 980 # CMEQ (register) 981 cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;" 982 threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2, 983 cmeqCode) 984 threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4, 985 cmeqCode) 986 # CMEQ (zero) 987 cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;" 988 twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2, 989 cmeqZeroCode) 990 twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4, 991 cmeqZeroCode) 992 # CMGE (register) 993 cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;" 994 threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode) 995 threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode) 996 # CMGE (zero) 997 cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;" 998 twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2, 999 cmgeZeroCode) 1000 twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4, 1001 cmgeZeroCode) 1002 # CMGT (register) 1003 cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;" 1004 threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode) 1005 threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode) 1006 # CMGT (zero) 1007 cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;" 1008 twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2, 1009 cmgtZeroCode) 1010 twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4, 1011 cmgtZeroCode) 1012 # CMHI (register) 1013 threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2, 1014 cmgtCode) 1015 threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4, 1016 cmgtCode) 1017 # CMHS (register) 1018 threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2, 1019 cmgeCode) 1020 threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4, 1021 cmgeCode) 1022 # CMLE (zero) 1023 cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;" 1024 twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2, 1025 cmleZeroCode) 1026 twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4, 1027 cmleZeroCode) 1028 # CMLT (zero) 1029 cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;" 1030 twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2, 1031 cmltZeroCode) 1032 twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4, 1033 cmltZeroCode) 1034 # CMTST (register) 1035 tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;" 1036 threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2, 1037 tstCode) 1038 threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4, 1039 tstCode) 1040 # CNT 1041 cntCode = ''' 1042 unsigned count = 0; 1043 while (srcElem1 && count < sizeof(Element) * 8) { 1044 count += srcElem1 & 0x1; 1045 srcElem1 >>= 1; 1046 } 1047 destElem = count; 1048 ''' 1049 twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode) 1050 twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode) 1051 # DUP (element) 1052 dupCode = "destElem = srcElem1;" 1053 twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2, 1054 dupCode, isDup=True, byElem=True) 1055 twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4, 1056 dupCode, isDup=True, byElem=True) 1057 twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4, 1058 dupCode, isDup=True, byElem=True, scalar=True) 1059 # DUP (general register) 1060 dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W') 1061 dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') 1062 dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X') 1063 # EOR 1064 eorCode = "destElem = srcElem1 ^ srcElem2;" 1065 threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode) 1066 threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode) 1067 # EXT 1068 extCode = ''' 1069 for (unsigned i = 0; i < eCount; i++) { 1070 unsigned index = i + imm; 1071 if (index < eCount) { 1072 destReg.elements[i] = srcReg1.elements[index]; 1073 } else { 1074 index -= eCount; 1075 if (index >= eCount) { 1076 fault = new UndefinedInstruction(machInst, false, mnemonic); 1077 } else { 1078 destReg.elements[i] = srcReg2.elements[index]; 1079 } 1080 } 1081 } 1082 ''' 1083 extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode) 1084 extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode) 1085 # FABD 1086 fpOp = ''' 1087 FPSCR fpscr = (FPSCR) FpscrExc; 1088 destElem = %s; 1089 FpscrExc = fpscr; 1090 ''' 1091 fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))" 1092 threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2, 1093 fabdCode) 1094 threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4, 1095 fabdCode) 1096 threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4, 1097 fabdCode, scalar=True) 1098 # FABS 1099 fabsCode = fpOp % "fplibAbs<Element>(srcElem1)" 1100 twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2, 1101 fabsCode) 1102 twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4, 1103 fabsCode) 1104 # FACGE 1105 fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1)," 1106 " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0") 1107 facgeCode = fpCmpAbsOp % "GE" 1108 threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes, 1109 2, facgeCode) 1110 threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4, 1111 facgeCode) 1112 threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4, 1113 facgeCode, scalar=True) 1114 # FACGT 1115 facgtCode = fpCmpAbsOp % "GT" 1116 threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes, 1117 2, facgtCode) 1118 threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4, 1119 facgtCode) 1120 threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4, 1121 facgtCode, scalar=True) 1122 # FADD 1123 fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)" 1124 faddCode = fpBinOp % "Add" 1125 threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2, 1126 faddCode) 1127 threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4, 1128 faddCode) 1129 # FADDP (scalar) 1130 twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp", 1131 ("uint32_t",), 2, faddCode) 1132 twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp", 1133 ("uint64_t",), 4, faddCode) 1134 # FADDP (vector) 1135 threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes, 1136 2, faddCode, pairwise=True) 1137 threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4, 1138 faddCode, pairwise=True) 1139 # FCMEQ (register) 1140 fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?" 1141 " -1 : 0") 1142 fcmeqCode = fpCmpOp % "EQ" 1143 threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes, 1144 2, fcmeqCode) 1145 threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4, 1146 fcmeqCode) 1147 threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4, 1148 fcmeqCode, scalar=True) 1149 # FCMEQ (zero) 1150 fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0" 1151 fcmeqZeroCode = fpCmpZeroOp % "EQ" 1152 twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1153 2, fcmeqZeroCode) 1154 twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1155 fcmeqZeroCode) 1156 twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1157 fcmeqZeroCode, scalar=True) 1158 # FCMGE (register) 1159 fcmgeCode = fpCmpOp % "GE" 1160 threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes, 1161 2, fcmgeCode) 1162 threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4, 1163 fcmgeCode) 1164 threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4, 1165 fcmgeCode, scalar=True) 1166 # FCMGE (zero) 1167 fcmgeZeroCode = fpCmpZeroOp % "GE" 1168 twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1169 2, fcmgeZeroCode) 1170 twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1171 fcmgeZeroCode) 1172 twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1173 fcmgeZeroCode, scalar=True) 1174 # FCMGT (register) 1175 fcmgtCode = fpCmpOp % "GT" 1176 threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes, 1177 2, fcmgtCode) 1178 threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4, 1179 fcmgtCode) 1180 threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4, 1181 fcmgtCode, scalar=True) 1182 # FCMGT (zero) 1183 fcmgtZeroCode = fpCmpZeroOp % "GT" 1184 twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1185 2, fcmgtZeroCode) 1186 twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1187 fcmgtZeroCode) 1188 twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1189 fcmgtZeroCode, scalar=True) 1190 # FCMLE (zero) 1191 fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?" 1192 " -1 : 0") 1193 fcmleZeroCode = fpCmpRevZeroOp % "GE" 1194 twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1195 2, fcmleZeroCode) 1196 twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1197 fcmleZeroCode) 1198 twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1199 fcmleZeroCode, scalar=True) 1200 # FCMLT (zero) 1201 fcmltZeroCode = fpCmpRevZeroOp % "GT" 1202 twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1203 2, fcmltZeroCode) 1204 twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1205 fcmltZeroCode) 1206 twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1207 fcmltZeroCode, scalar=True) 1208 # FCVTAS 1209 fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>(" 1210 "srcElem1, %s, %s, %s, fpscr)") 1211 fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY") 1212 twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2, 1213 fcvtasCode) 1214 twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4, 1215 fcvtasCode) 1216 twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4, 1217 fcvtasCode, scalar=True) 1218 # FCVTAU 1219 fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY") 1220 twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2, 1221 fcvtauCode) 1222 twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4, 1223 fcvtauCode) 1224 twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4, 1225 fcvtauCode, scalar=True) 1226 # FCVTL, FCVTL2 1227 fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>(" 1228 "srcElem1, FPCRRounding(fpscr), fpscr)") 1229 twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"), 1230 fcvtlCode) 1231 twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"), 1232 fcvtlCode, hi=True) 1233 # FCVTMS 1234 fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF") 1235 twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2, 1236 fcvtmsCode) 1237 twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4, 1238 fcvtmsCode) 1239 twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4, 1240 fcvtmsCode, scalar=True) 1241 # FCVTMU 1242 fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF") 1243 twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2, 1244 fcvtmuCode) 1245 twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4, 1246 fcvtmuCode) 1247 twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4, 1248 fcvtmuCode, scalar=True) 1249 # FCVTN, FCVTN2 1250 fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>(" 1251 "srcElem1, FPCRRounding(fpscr), fpscr)") 1252 twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp", 1253 ("uint16_t", "uint32_t"), fcvtnCode) 1254 twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp", 1255 ("uint16_t", "uint32_t"), fcvtnCode, hi=True) 1256 # FCVTNS 1257 fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN") 1258 twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2, 1259 fcvtnsCode) 1260 twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4, 1261 fcvtnsCode) 1262 twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4, 1263 fcvtnsCode, scalar=True) 1264 # FCVTNU 1265 fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN") 1266 twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2, 1267 fcvtnuCode) 1268 twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4, 1269 fcvtnuCode) 1270 twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4, 1271 fcvtnuCode, scalar=True) 1272 # FCVTPS 1273 fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF") 1274 twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2, 1275 fcvtpsCode) 1276 twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4, 1277 fcvtpsCode) 1278 twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4, 1279 fcvtpsCode, scalar=True) 1280 # FCVTPU 1281 fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF") 1282 twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2, 1283 fcvtpuCode) 1284 twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4, 1285 fcvtpuCode) 1286 twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4, 1287 fcvtpuCode, scalar=True) 1288 # FCVTXN, FCVTXN2 1289 fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>(" 1290 "srcElem1, FPRounding_ODD, fpscr)") 1291 twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes, 1292 fcvtxnCode) 1293 twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes, 1294 fcvtxnCode, hi=True) 1295 twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes, 1296 fcvtxnCode, scalar=True) 1297 # FCVTZS (fixed-point) 1298 fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO") 1299 twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes, 1300 2, fcvtzsCode, hasImm=True) 1301 twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4, 1302 fcvtzsCode, hasImm=True) 1303 twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4, 1304 fcvtzsCode, hasImm=True, scalar=True) 1305 # FCVTZS (integer) 1306 fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO") 1307 twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes, 1308 2, fcvtzsIntCode) 1309 twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4, 1310 fcvtzsIntCode) 1311 twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4, 1312 fcvtzsIntCode, scalar=True) 1313 # FCVTZU (fixed-point) 1314 fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO") 1315 twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes, 1316 2, fcvtzuCode, hasImm=True) 1317 twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4, 1318 fcvtzuCode, hasImm=True) 1319 twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4, 1320 fcvtzuCode, hasImm=True, scalar=True) 1321 # FCVTZU (integer) 1322 fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO") 1323 twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2, 1324 fcvtzuIntCode) 1325 twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4, 1326 fcvtzuIntCode) 1327 twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4, 1328 fcvtzuIntCode, scalar=True) 1329 # FDIV 1330 fdivCode = fpBinOp % "Div" 1331 threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2, 1332 fdivCode) 1333 threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4, 1334 fdivCode) 1335 # FMAX 1336 fmaxCode = fpBinOp % "Max" 1337 threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2, 1338 fmaxCode) 1339 threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4, 1340 fmaxCode) 1341 # FMAXNM 1342 fmaxnmCode = fpBinOp % "MaxNum" 1343 threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes, 1344 2, fmaxnmCode) 1345 threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4, 1346 fmaxnmCode) 1347 # FMAXNMP (scalar) 1348 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp", 1349 ("uint32_t",), 2, fmaxnmCode) 1350 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp", 1351 ("uint64_t",), 4, fmaxnmCode) 1352 # FMAXNMP (vector) 1353 threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp", 1354 smallFloatTypes, 2, fmaxnmCode, pairwise=True) 1355 threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4, 1356 fmaxnmCode, pairwise=True) 1357 # FMAXNMV 1358 # Note: SimdFloatCmpOp can be a bit optimistic here 1359 fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)" 1360 fmaxnmAcrossCode = fpAcrossOp % "MaxNum" 1361 twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",), 1362 4, fmaxnmAcrossCode) 1363 # FMAXP (scalar) 1364 twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp", 1365 ("uint32_t",), 2, fmaxCode) 1366 twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp", 1367 ("uint64_t",), 4, fmaxCode) 1368 # FMAXP (vector) 1369 threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes, 1370 2, fmaxCode, pairwise=True) 1371 threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4, 1372 fmaxCode, pairwise=True) 1373 # FMAXV 1374 # Note: SimdFloatCmpOp can be a bit optimistic here 1375 fmaxAcrossCode = fpAcrossOp % "Max" 1376 twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4, 1377 fmaxAcrossCode) 1378 # FMIN 1379 fminCode = fpBinOp % "Min" 1380 threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2, 1381 fminCode) 1382 threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4, 1383 fminCode) 1384 # FMINNM 1385 fminnmCode = fpBinOp % "MinNum" 1386 threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes, 1387 2, fminnmCode) 1388 threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4, 1389 fminnmCode) 1390 # FMINNMP (scalar) 1391 twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp", 1392 ("uint32_t",), 2, fminnmCode) 1393 twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp", 1394 ("uint64_t",), 4, fminnmCode) 1395 # FMINNMP (vector) 1396 threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp", 1397 smallFloatTypes, 2, fminnmCode, pairwise=True) 1398 threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4, 1399 fminnmCode, pairwise=True) 1400 # FMINNMV 1401 # Note: SimdFloatCmpOp can be a bit optimistic here 1402 fminnmAcrossCode = fpAcrossOp % "MinNum" 1403 twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",), 1404 4, fminnmAcrossCode) 1405 # FMINP (scalar) 1406 twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp", 1407 ("uint32_t",), 2, fminCode) 1408 twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp", 1409 ("uint64_t",), 4, fminCode) 1410 # FMINP (vector) 1411 threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes, 1412 2, fminCode, pairwise=True) 1413 threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4, 1414 fminCode, pairwise=True) 1415 # FMINV 1416 # Note: SimdFloatCmpOp can be a bit optimistic here 1417 fminAcrossCode = fpAcrossOp % "Min" 1418 twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4, 1419 fminAcrossCode) 1420 # FMLA (by element) 1421 fmlaCode = fpOp % ("fplibMulAdd<Element>(" 1422 "destElem, srcElem1, srcElem2, fpscr)") 1423 threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp", 1424 smallFloatTypes, 2, fmlaCode, True, byElem=True) 1425 threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes, 1426 4, fmlaCode, True, byElem=True) 1427 threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes, 1428 4, fmlaCode, True, byElem=True, scalar=True) 1429 # FMLA (vector) 1430 threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes, 1431 2, fmlaCode, True) 1432 threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4, 1433 fmlaCode, True) 1434 # FMLS (by element) 1435 fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem," 1436 " fplibNeg<Element>(srcElem1), srcElem2, fpscr)") 1437 threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp", 1438 smallFloatTypes, 2, fmlsCode, True, byElem=True) 1439 threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes, 1440 4, fmlsCode, True, byElem=True) 1441 threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes, 1442 4, fmlsCode, True, byElem=True, scalar=True) 1443 # FMLS (vector) 1444 threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes, 1445 2, fmlsCode, True) 1446 threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4, 1447 fmlsCode, True) 1448 # FMOV 1449 fmovCode = 'destElem = imm;' 1450 oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2, 1451 fmovCode) 1452 oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode) 1453 # FMUL (by element) 1454 fmulCode = fpBinOp % "Mul" 1455 threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp", 1456 smallFloatTypes, 2, fmulCode, byElem=True) 1457 threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4, 1458 fmulCode, byElem=True) 1459 threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4, 1460 fmulCode, byElem=True, scalar=True) 1461 # FMUL (vector) 1462 threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2, 1463 fmulCode) 1464 threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4, 1465 fmulCode) 1466 # FMULX 1467 fmulxCode = fpBinOp % "MulX" 1468 threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes, 1469 2, fmulxCode) 1470 threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4, 1471 fmulxCode) 1472 threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4, 1473 fmulxCode, scalar=True) 1474 # FMULX (by element) 1475 threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp", 1476 smallFloatTypes, 2, fmulxCode, byElem=True) 1477 threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes, 1478 4, fmulxCode, byElem=True) 1479 threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes, 1480 4, fmulxCode, byElem=True, scalar=True) 1481 # FNEG 1482 fnegCode = fpOp % "fplibNeg<Element>(srcElem1)" 1483 twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2, 1484 fnegCode) 1485 twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4, 1486 fnegCode) 1487 # FRECPE 1488 frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)" 1489 twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp", 1490 smallFloatTypes, 2, frecpeCode) 1491 twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4, 1492 frecpeCode) 1493 twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes, 1494 4, frecpeCode, scalar=True) 1495 # FRECPS 1496 frecpsCode = fpBinOp % "RecipStepFused" 1497 threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp", 1498 smallFloatTypes, 2, frecpsCode) 1499 threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes, 1500 4, frecpsCode) 1501 threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes, 1502 4, frecpsCode, scalar=True) 1503 # FRECPX 1504 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)" 1505 twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4, 1506 frecpxCode, scalar=True) 1507 # FRINTA 1508 frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)" 1509 frintaCode = frintCode % ("FPRounding_TIEAWAY", "false") 1510 twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2, 1511 frintaCode) 1512 twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4, 1513 frintaCode) 1514 # FRINTI 1515 frintiCode = frintCode % ("FPCRRounding(fpscr)", "false") 1516 twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2, 1517 frintiCode) 1518 twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4, 1519 frintiCode) 1520 # FRINTM 1521 frintmCode = frintCode % ("FPRounding_NEGINF", "false") 1522 twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2, 1523 frintmCode) 1524 twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4, 1525 frintmCode) 1526 # FRINTN 1527 frintnCode = frintCode % ("FPRounding_TIEEVEN", "false") 1528 twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2, 1529 frintnCode) 1530 twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4, 1531 frintnCode) 1532 # FRINTP 1533 frintpCode = frintCode % ("FPRounding_POSINF", "false") 1534 twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2, 1535 frintpCode) 1536 twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4, 1537 frintpCode) 1538 # FRINTX 1539 frintxCode = frintCode % ("FPCRRounding(fpscr)", "true") 1540 twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2, 1541 frintxCode) 1542 twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4, 1543 frintxCode) 1544 # FRINTZ 1545 frintzCode = frintCode % ("FPRounding_ZERO", "false") 1546 twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2, 1547 frintzCode) 1548 twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4, 1549 frintzCode) 1550 # FRSQRTE 1551 frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)" 1552 twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp", 1553 smallFloatTypes, 2, frsqrteCode) 1554 twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4, 1555 frsqrteCode) 1556 twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4, 1557 frsqrteCode, scalar=True) 1558 # FRSQRTS 1559 frsqrtsCode = fpBinOp % "RSqrtStepFused" 1560 threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp", 1561 smallFloatTypes, 2, frsqrtsCode) 1562 threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes, 1563 4, frsqrtsCode) 1564 threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes, 1565 4, frsqrtsCode, scalar=True) 1566 # FSQRT 1567 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)" 1568 twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2, 1569 fsqrtCode) 1570 twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4, 1571 fsqrtCode) 1572 # FSUB 1573 fsubCode = fpBinOp % "Sub" 1574 threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2, 1575 fsubCode) 1576 threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4, 1577 fsubCode) 1578 # INS (element) 1579 insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4) 1580 # INS (general register) 1581 insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4, 1582 'W') 1583 insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X') 1584 # MLA (by element) 1585 mlaCode = "destElem += srcElem1 * srcElem2;" 1586 threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp", 1587 ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True) 1588 threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp", 1589 ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True) 1590 # MLA (vector) 1591 threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2, 1592 mlaCode, True) 1593 threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4, 1594 mlaCode, True) 1595 # MLS (by element) 1596 mlsCode = "destElem -= srcElem1 * srcElem2;" 1597 threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp", 1598 ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True) 1599 threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp", 1600 ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True) 1601 # MLS (vector) 1602 threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2, 1603 mlsCode, True) 1604 threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4, 1605 mlsCode, True) 1606 # MOV (element) -> alias to INS (element) 1607 # MOV (from general) -> alias to INS (general register) 1608 # MOV (scalar) -> alias to DUP (element) 1609 # MOV (to general) -> alias to UMOV 1610 # MOV (vector) -> alias to ORR (register) 1611 # MOVI 1612 movImmCode = "destElem = imm;" 1613 oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2, 1614 movImmCode) 1615 oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4, 1616 movImmCode) 1617 # MUL (by element) 1618 mulCode = "destElem = srcElem1 * srcElem2;" 1619 threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp", 1620 ("uint16_t", "uint32_t"), 2, mulCode, byElem=True) 1621 threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp", 1622 ("uint16_t", "uint32_t"), 4, mulCode, byElem=True) 1623 # MUL (vector) 1624 threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2, 1625 mulCode) 1626 threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4, 1627 mulCode) 1628 # MVN 1629 mvnCode = "destElem = ~srcElem1;" 1630 twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode) 1631 twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode) 1632 # MVNI 1633 mvniCode = "destElem = ~imm;" 1634 oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode) 1635 oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode) 1636 # NEG 1637 negCode = "destElem = -srcElem1;" 1638 twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode) 1639 twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode) 1640 # NOT -> alias to MVN 1641 # ORN 1642 ornCode = "destElem = srcElem1 | ~srcElem2;" 1643 threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode) 1644 threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode) 1645 # ORR (immediate) 1646 orrImmCode = "destElem |= imm;" 1647 oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2, 1648 orrImmCode, True) 1649 oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4, 1650 orrImmCode, True) 1651 # ORR (register) 1652 orrCode = "destElem = srcElem1 | srcElem2;" 1653 threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode) 1654 threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode) 1655 # PMUL 1656 pmulCode = ''' 1657 destElem = 0; 1658 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 1659 if (bits(srcElem2, j)) 1660 destElem ^= srcElem1 << j; 1661 } 1662 ''' 1663 threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2, 1664 pmulCode) 1665 threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4, 1666 pmulCode) 1667 # PMULL, PMULL2 1668 # Note: 64-bit PMULL is not available (Crypto. Extension) 1669 pmullCode = ''' 1670 destElem = 0; 1671 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 1672 if (bits(srcElem2, j)) 1673 destElem ^= (BigElement)srcElem1 << j; 1674 } 1675 ''' 1676 threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode) 1677 threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",), 1678 pmullCode, hi=True) 1679 # RADDHN, RADDHN2 1680 raddhnCode = ''' 1681 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1682 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1683 (sizeof(Element) * 8); 1684 ''' 1685 threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes, 1686 raddhnCode) 1687 threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes, 1688 raddhnCode, hi=True) 1689 # RBIT 1690 rbitCode = ''' 1691 destElem = 0; 1692 Element temp = srcElem1; 1693 for (int i = 0; i < 8 * sizeof(Element); i++) { 1694 destElem = destElem | ((temp & 0x1) << 1695 (8 * sizeof(Element) - 1 - i)); 1696 temp >>= 1; 1697 } 1698 ''' 1699 twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode) 1700 twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode) 1701 # REV16 1702 rev16Code = ''' 1703 destElem = srcElem1; 1704 unsigned groupSize = ((1 << 1) / sizeof(Element)); 1705 unsigned reverseMask = (groupSize - 1); 1706 j = i ^ reverseMask; 1707 ''' 1708 twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2, 1709 rev16Code) 1710 twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4, 1711 rev16Code) 1712 # REV32 1713 rev32Code = ''' 1714 destElem = srcElem1; 1715 unsigned groupSize = ((1 << 2) / sizeof(Element)); 1716 unsigned reverseMask = (groupSize - 1); 1717 j = i ^ reverseMask; 1718 ''' 1719 twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"), 1720 2, rev32Code) 1721 twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"), 1722 4, rev32Code) 1723 # REV64 1724 rev64Code = ''' 1725 destElem = srcElem1; 1726 unsigned groupSize = ((1 << 3) / sizeof(Element)); 1727 unsigned reverseMask = (groupSize - 1); 1728 j = i ^ reverseMask; 1729 ''' 1730 twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2, 1731 rev64Code) 1732 twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4, 1733 rev64Code) 1734 # RSHRN, RSHRN2 1735 rshrnCode = ''' 1736 if (imm > sizeof(srcElem1) * 8) { 1737 destElem = 0; 1738 } else if (imm) { 1739 Element rBit = bits(srcElem1, imm - 1); 1740 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 1741 } else { 1742 destElem = srcElem1; 1743 } 1744 ''' 1745 twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes, 1746 rshrnCode, hasImm=True) 1747 twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes, 1748 rshrnCode, hasImm=True, hi=True) 1749 # RSUBHN, RSUBHN2 1750 rsubhnCode = ''' 1751 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1752 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1753 (sizeof(Element) * 8); 1754 ''' 1755 threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes, 1756 rsubhnCode) 1757 threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes, 1758 rsubhnCode, hi=True) 1759 # SABA 1760 abaCode = ''' 1761 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1762 (srcElem2 - srcElem1); 1763 ''' 1764 threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2, 1765 abaCode, True) 1766 threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4, 1767 abaCode, True) 1768 # SABAL, SABAL2 1769 abalCode = ''' 1770 destElem += (srcElem1 > srcElem2) ? 1771 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1772 ((BigElement)srcElem2 - (BigElement)srcElem1); 1773 ''' 1774 threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes, 1775 abalCode, True) 1776 threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes, 1777 abalCode, True, hi=True) 1778 # SABD 1779 abdCode = ''' 1780 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1781 (srcElem2 - srcElem1); 1782 ''' 1783 threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2, 1784 abdCode) 1785 threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4, 1786 abdCode) 1787 # SABDL, SABDL2 1788 abdlCode = ''' 1789 destElem = (srcElem1 > srcElem2) ? 1790 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1791 ((BigElement)srcElem2 - (BigElement)srcElem1); 1792 ''' 1793 threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes, 1794 abdlCode, True) 1795 threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes, 1796 abdlCode, True, hi=True) 1797 # SADALP 1798 adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;" 1799 twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2, 1800 adalpCode, True) 1801 twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4, 1802 adalpCode, True) 1803 # SADDL, SADDL2 1804 addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;" 1805 threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes, 1806 addlwCode) 1807 threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes, 1808 addlwCode, hi=True) 1809 # SADDLP 1810 twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2, 1811 addlwCode) 1812 twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4, 1813 addlwCode) 1814 # SADDLV 1815 # Note: SimdAddOp can be a bit optimistic here 1816 addAcrossLongCode = "destElem += (BigElement)srcElem1;" 1817 twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"), 1818 2, addAcrossLongCode, long=True) 1819 twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"), 1820 4, addAcrossLongCode, long=True) 1821 twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4, 1822 addAcrossLongCode, doubleDest=True, long=True) 1823 # SADDW, SADDW2 1824 threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes, 1825 addlwCode) 1826 threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes, 1827 addlwCode, hi=True) 1828 # SCVTF (fixed-point) 1829 scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm," 1830 " false, FPCRRounding(fpscr), fpscr)") 1831 twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, 1832 scvtfFixedCode % 32, hasImm=True) 1833 twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4, 1834 scvtfFixedCode % 32, hasImm=True) 1835 twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4, 1836 scvtfFixedCode % 64, hasImm=True) 1837 twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes, 1838 4, scvtfFixedCode % 32, hasImm=True, scalar=True) 1839 twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4, 1840 scvtfFixedCode % 64, hasImm=True, scalar=True) 1841 # SCVTF (integer) 1842 scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0," 1843 " false, FPCRRounding(fpscr), fpscr)") 1844 twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, 1845 scvtfIntCode % 32) 1846 twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4, 1847 scvtfIntCode % 32) 1848 twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4, 1849 scvtfIntCode % 64) 1850 twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4, 1851 scvtfIntCode % 32, scalar=True) 1852 twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4, 1853 scvtfIntCode % 64, scalar=True) 1854 # SHADD 1855 haddCode = ''' 1856 Element carryBit = 1857 (((unsigned)srcElem1 & 0x1) + 1858 ((unsigned)srcElem2 & 0x1)) >> 1; 1859 // Use division instead of a shift to ensure the sign extension works 1860 // right. The compiler will figure out if it can be a shift. Mask the 1861 // inputs so they get truncated correctly. 1862 destElem = (((srcElem1 & ~(Element)1) / 2) + 1863 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1864 ''' 1865 threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2, 1866 haddCode) 1867 threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4, 1868 haddCode) 1869 # SHL 1870 shlCode = ''' 1871 if (imm >= sizeof(Element) * 8) 1872 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 1873 else 1874 destElem = srcElem1 << imm; 1875 ''' 1876 twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode, 1877 hasImm=True) 1878 twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode, 1879 hasImm=True) 1880 # SHLL, SHLL2 1881 shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);" 1882 twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode) 1883 twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode, 1884 hi=True) 1885 # SHRN, SHRN2 1886 shrnCode = ''' 1887 if (imm >= sizeof(srcElem1) * 8) { 1888 destElem = 0; 1889 } else { 1890 destElem = srcElem1 >> imm; 1891 } 1892 ''' 1893 twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes, 1894 shrnCode, hasImm=True) 1895 twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes, 1896 shrnCode, hasImm=True, hi=True) 1897 # SHSUB 1898 hsubCode = ''' 1899 Element borrowBit = 1900 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1901 // Use division instead of a shift to ensure the sign extension works 1902 // right. The compiler will figure out if it can be a shift. Mask the 1903 // inputs so they get truncated correctly. 1904 destElem = (((srcElem1 & ~(Element)1) / 2) - 1905 ((srcElem2 & ~(Element)1) / 2)) - borrowBit; 1906 ''' 1907 threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2, 1908 hsubCode) 1909 threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4, 1910 hsubCode) 1911 # SLI 1912 sliCode = ''' 1913 if (imm >= sizeof(Element) * 8) 1914 destElem = destElem; 1915 else 1916 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 1917 ''' 1918 twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode, 1919 True, hasImm=True) 1920 twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode, 1921 True, hasImm=True) 1922 # SMAX 1923 maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;" 1924 threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2, 1925 maxCode) 1926 threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4, 1927 maxCode) 1928 # SMAXP 1929 threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2, 1930 maxCode, pairwise=True) 1931 threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4, 1932 maxCode, pairwise=True) 1933 # SMAXV 1934 maxAcrossCode = ''' 1935 if (i == 0 || srcElem1 > destElem) 1936 destElem = srcElem1; 1937 ''' 1938 twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"), 1939 2, maxAcrossCode) 1940 twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4, 1941 maxAcrossCode) 1942 # SMIN 1943 minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;" 1944 threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2, 1945 minCode) 1946 threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4, 1947 minCode) 1948 # SMINP 1949 threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2, 1950 minCode, pairwise=True) 1951 threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4, 1952 minCode, pairwise=True) 1953 # SMINV 1954 minAcrossCode = ''' 1955 if (i == 0 || srcElem1 < destElem) 1956 destElem = srcElem1; 1957 ''' 1958 twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"), 1959 2, minAcrossCode) 1960 twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4, 1961 minAcrossCode) 1962 1963 split('exec') 1964 1965 # SMLAL, SMLAL2 (by element) 1966 mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;" 1967 threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp", 1968 ("int16_t", "int32_t"), mlalCode, True, byElem=True) 1969 threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp", 1970 ("int16_t", "int32_t"), mlalCode, True, byElem=True, 1971 hi=True) 1972 # SMLAL, SMLAL2 (vector) 1973 threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes, 1974 mlalCode, True) 1975 threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes, 1976 mlalCode, True, hi=True) 1977 # SMLSL, SMLSL2 (by element) 1978 mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;" 1979 threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes, 1980 mlslCode, True, byElem=True) 1981 threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp", 1982 smallSignedTypes, mlslCode, True, byElem=True, hi=True) 1983 # SMLSL, SMLSL2 (vector) 1984 threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes, 1985 mlslCode, True) 1986 threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes, 1987 mlslCode, True, hi=True) 1988 # SMOV 1989 insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4, 1990 'W', True) 1991 insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X', 1992 True) 1993 # SMULL, SMULL2 (by element) 1994 mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;" 1995 threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes, 1996 mullCode, byElem=True) 1997 threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes, 1998 mullCode, byElem=True, hi=True) 1999 # SMULL, SMULL2 (vector) 2000 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes, 2001 mullCode) 2002 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes, 2003 mullCode, hi=True) 2004 # SQABS 2005 sqabsCode = ''' 2006 FPSCR fpscr = (FPSCR) FpscrQc; 2007 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2008 fpscr.qc = 1; 2009 destElem = ~srcElem1; 2010 } else if (srcElem1 < 0) { 2011 destElem = -srcElem1; 2012 } else { 2013 destElem = srcElem1; 2014 } 2015 FpscrQc = fpscr; 2016 ''' 2017 twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2, 2018 sqabsCode) 2019 twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4, 2020 sqabsCode) 2021 twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4, 2022 sqabsCode, scalar=True) 2023 # SQADD 2024 sqaddCode = ''' 2025 destElem = srcElem1 + srcElem2; 2026 FPSCR fpscr = (FPSCR) FpscrQc; 2027 bool negDest = (destElem < 0); 2028 bool negSrc1 = (srcElem1 < 0); 2029 bool negSrc2 = (srcElem2 < 0); 2030 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 2031 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2032 if (negDest) 2033 destElem -= 1; 2034 fpscr.qc = 1; 2035 } 2036 FpscrQc = fpscr; 2037 ''' 2038 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2, 2039 sqaddCode) 2040 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4, 2041 sqaddCode) 2042 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4, 2043 sqaddCode, scalar=True) 2044 # SQDMLAL, SQDMLAL2 (by element) 2045 qdmlalCode = ''' 2046 FPSCR fpscr = (FPSCR) FpscrQc; 2047 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2048 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2049 Element halfNeg = maxNeg / 2; 2050 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2051 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2052 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2053 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2054 fpscr.qc = 1; 2055 } 2056 bool negPreDest = ltz(destElem); 2057 destElem += midElem; 2058 bool negDest = ltz(destElem); 2059 bool negMid = ltz(midElem); 2060 if (negPreDest == negMid && negMid != negDest) { 2061 destElem = mask(sizeof(BigElement) * 8 - 1); 2062 if (negPreDest) 2063 destElem = ~destElem; 2064 fpscr.qc = 1; 2065 } 2066 FpscrQc = fpscr; 2067 ''' 2068 threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp", 2069 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True) 2070 threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp", 2071 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, 2072 hi=True) 2073 threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp", 2074 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, 2075 scalar=True) 2076 # SQDMLAL, SQDMLAL2 (vector) 2077 threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp", 2078 ("int16_t", "int32_t"), qdmlalCode, True) 2079 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp", 2080 ("int16_t", "int32_t"), qdmlalCode, True, hi=True) 2081 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp", 2082 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True) 2083 # SQDMLSL, SQDMLSL2 (by element) 2084 qdmlslCode = ''' 2085 FPSCR fpscr = (FPSCR) FpscrQc; 2086 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2087 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2088 Element halfNeg = maxNeg / 2; 2089 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2090 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2091 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2092 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2093 fpscr.qc = 1; 2094 } 2095 bool negPreDest = ltz(destElem); 2096 destElem -= midElem; 2097 bool negDest = ltz(destElem); 2098 bool posMid = ltz((BigElement)-midElem); 2099 if (negPreDest == posMid && posMid != negDest) { 2100 destElem = mask(sizeof(BigElement) * 8 - 1); 2101 if (negPreDest) 2102 destElem = ~destElem; 2103 fpscr.qc = 1; 2104 } 2105 FpscrQc = fpscr; 2106 ''' 2107 threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp", 2108 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True) 2109 threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp", 2110 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, 2111 hi=True) 2112 threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp", 2113 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, 2114 scalar=True) 2115 # SQDMLSL, SQDMLSL2 (vector) 2116 threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp", 2117 ("int16_t", "int32_t"), qdmlslCode, True) 2118 threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp", 2119 ("int16_t", "int32_t"), qdmlslCode, True, hi=True) 2120 threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp", 2121 ("int16_t", "int32_t"), qdmlslCode, True, scalar=True) 2122 # SQDMULH (by element) 2123 sqdmulhCode = ''' 2124 FPSCR fpscr = (FPSCR) FpscrQc; 2125 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2126 (sizeof(Element) * 8); 2127 if (srcElem1 == srcElem2 && 2128 srcElem1 == (Element)((Element)1 << 2129 (sizeof(Element) * 8 - 1))) { 2130 destElem = ~srcElem1; 2131 fpscr.qc = 1; 2132 } 2133 FpscrQc = fpscr; 2134 ''' 2135 threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp", 2136 ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True) 2137 threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp", 2138 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True) 2139 threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp", 2140 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True, 2141 scalar=True) 2142 # SQDMULH (vector) 2143 threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp", 2144 ("int16_t", "int32_t"), 2, sqdmulhCode) 2145 threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp", 2146 ("int16_t", "int32_t"), 4, sqdmulhCode) 2147 threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp", 2148 ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True) 2149 # SQDMULL, SQDMULL2 (by element) 2150 qdmullCode = ''' 2151 FPSCR fpscr = (FPSCR) FpscrQc; 2152 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2153 if (srcElem1 == srcElem2 && 2154 srcElem1 == (Element)((Element)1 << 2155 (Element)(sizeof(Element) * 8 - 1))) { 2156 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2157 fpscr.qc = 1; 2158 } 2159 FpscrQc = fpscr; 2160 ''' 2161 threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp", 2162 ("int16_t", "int32_t"), qdmullCode, True, byElem=True) 2163 threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp", 2164 ("int16_t", "int32_t"), qdmullCode, True, byElem=True, 2165 hi=True) 2166 threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp", 2167 ("int16_t", "int32_t"), qdmullCode, True, byElem=True, 2168 scalar=True) 2169 # SQDMULL, SQDMULL2 (vector) 2170 threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp", 2171 ("int16_t", "int32_t"), qdmullCode, True) 2172 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp", 2173 ("int16_t", "int32_t"), qdmullCode, True, hi=True) 2174 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp", 2175 ("int16_t", "int32_t"), qdmullCode, True, scalar=True) 2176 # SQNEG 2177 sqnegCode = ''' 2178 FPSCR fpscr = (FPSCR) FpscrQc; 2179 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2180 fpscr.qc = 1; 2181 destElem = ~srcElem1; 2182 } else { 2183 destElem = -srcElem1; 2184 } 2185 FpscrQc = fpscr; 2186 ''' 2187 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2, 2188 sqnegCode) 2189 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4, 2190 sqnegCode) 2191 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4, 2192 sqnegCode, scalar=True) 2193 # SQRDMULH (by element) 2194 sqrdmulhCode = ''' 2195 FPSCR fpscr = (FPSCR) FpscrQc; 2196 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2197 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2198 (sizeof(Element) * 8); 2199 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2200 Element halfNeg = maxNeg / 2; 2201 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2202 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2203 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2204 if (destElem < 0) { 2205 destElem = mask(sizeof(Element) * 8 - 1); 2206 } else { 2207 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2208 } 2209 fpscr.qc = 1; 2210 } 2211 FpscrQc = fpscr; 2212 ''' 2213 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp", 2214 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True) 2215 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp", 2216 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True) 2217 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp", 2218 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True, 2219 scalar=True) 2220 # SQRDMULH (vector) 2221 threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp", 2222 ("int16_t", "int32_t"), 2, sqrdmulhCode) 2223 threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp", 2224 ("int16_t", "int32_t"), 4, sqrdmulhCode) 2225 threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp", 2226 ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True) 2227 # SQRSHL 2228 sqrshlCode = ''' 2229 int16_t shiftAmt = (int8_t)srcElem2; 2230 FPSCR fpscr = (FPSCR) FpscrQc; 2231 if (shiftAmt < 0) { 2232 shiftAmt = -shiftAmt; 2233 Element rBit = 0; 2234 if (shiftAmt <= sizeof(Element) * 8) 2235 rBit = bits(srcElem1, shiftAmt - 1); 2236 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 2237 rBit = 1; 2238 if (shiftAmt >= sizeof(Element) * 8) { 2239 shiftAmt = sizeof(Element) * 8 - 1; 2240 destElem = 0; 2241 } else { 2242 destElem = (srcElem1 >> shiftAmt); 2243 } 2244 // Make sure the right shift sign extended when it should. 2245 if (srcElem1 < 0 && destElem >= 0) { 2246 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2247 1 - shiftAmt)); 2248 } 2249 destElem += rBit; 2250 } else if (shiftAmt > 0) { 2251 bool sat = false; 2252 if (shiftAmt >= sizeof(Element) * 8) { 2253 if (srcElem1 != 0) 2254 sat = true; 2255 else 2256 destElem = 0; 2257 } else { 2258 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, 2259 sizeof(Element) * 8 - 1 - shiftAmt) != 2260 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2261 sat = true; 2262 } else { 2263 destElem = srcElem1 << shiftAmt; 2264 } 2265 } 2266 if (sat) { 2267 fpscr.qc = 1; 2268 destElem = mask(sizeof(Element) * 8 - 1); 2269 if (srcElem1 < 0) 2270 destElem = ~destElem; 2271 } 2272 } else { 2273 destElem = srcElem1; 2274 } 2275 FpscrQc = fpscr; 2276 ''' 2277 threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2, 2278 sqrshlCode) 2279 threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4, 2280 sqrshlCode) 2281 threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4, 2282 sqrshlCode, scalar=True) 2283 # SQRSHRN, SQRSHRN2 2284 sqrshrnCode = ''' 2285 FPSCR fpscr = (FPSCR) FpscrQc; 2286 if (imm > sizeof(srcElem1) * 8) { 2287 if (srcElem1 != 0 && srcElem1 != -1) 2288 fpscr.qc = 1; 2289 destElem = 0; 2290 } else if (imm) { 2291 BigElement mid = (srcElem1 >> (imm - 1)); 2292 uint64_t rBit = mid & 0x1; 2293 mid >>= 1; 2294 mid |= -(mid & ((BigElement)1 << 2295 (sizeof(BigElement) * 8 - 1 - imm))); 2296 mid += rBit; 2297 if (mid != (Element)mid) { 2298 destElem = mask(sizeof(Element) * 8 - 1); 2299 if (srcElem1 < 0) 2300 destElem = ~destElem; 2301 fpscr.qc = 1; 2302 } else { 2303 destElem = mid; 2304 } 2305 } else { 2306 if (srcElem1 != (Element)srcElem1) { 2307 destElem = mask(sizeof(Element) * 8 - 1); 2308 if (srcElem1 < 0) 2309 destElem = ~destElem; 2310 fpscr.qc = 1; 2311 } else { 2312 destElem = srcElem1; 2313 } 2314 } 2315 FpscrQc = fpscr; 2316 ''' 2317 twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes, 2318 sqrshrnCode, hasImm=True) 2319 twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes, 2320 sqrshrnCode, hasImm=True, hi=True) 2321 twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes, 2322 sqrshrnCode, hasImm=True, scalar=True) 2323 # SQRSHRUN, SQRSHRUN2 2324 sqrshrunCode = ''' 2325 FPSCR fpscr = (FPSCR) FpscrQc; 2326 if (imm > sizeof(srcElem1) * 8) { 2327 if (srcElem1 != 0) 2328 fpscr.qc = 1; 2329 destElem = 0; 2330 } else if (imm) { 2331 BigElement mid = (srcElem1 >> (imm - 1)); 2332 uint64_t rBit = mid & 0x1; 2333 mid >>= 1; 2334 mid |= -(mid & ((BigElement)1 << 2335 (sizeof(BigElement) * 8 - 1 - imm))); 2336 mid += rBit; 2337 if (bits(mid, sizeof(BigElement) * 8 - 1, 2338 sizeof(Element) * 8) != 0) { 2339 if (srcElem1 < 0) { 2340 destElem = 0; 2341 } else { 2342 destElem = mask(sizeof(Element) * 8); 2343 } 2344 fpscr.qc = 1; 2345 } else { 2346 destElem = mid; 2347 } 2348 } else { 2349 if (srcElem1 < 0) { 2350 fpscr.qc = 1; 2351 destElem = 0; 2352 } else { 2353 destElem = srcElem1; 2354 } 2355 } 2356 FpscrQc = fpscr; 2357 ''' 2358 twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes, 2359 sqrshrunCode, hasImm=True) 2360 twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp", 2361 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True) 2362 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp", 2363 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True) 2364 # SQSHL (immediate) 2365 sqshlImmCode = ''' 2366 FPSCR fpscr = (FPSCR) FpscrQc; 2367 if (imm >= sizeof(Element) * 8) { 2368 if (srcElem1 != 0) { 2369 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2370 if (srcElem1 > 0) 2371 destElem = ~destElem; 2372 fpscr.qc = 1; 2373 } else { 2374 destElem = 0; 2375 } 2376 } else if (imm) { 2377 destElem = (srcElem1 << imm); 2378 uint64_t topBits = bits((uint64_t)srcElem1, 2379 sizeof(Element) * 8 - 1, 2380 sizeof(Element) * 8 - 1 - imm); 2381 if (topBits != 0 && topBits != mask(imm + 1)) { 2382 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2383 if (srcElem1 > 0) 2384 destElem = ~destElem; 2385 fpscr.qc = 1; 2386 } 2387 } else { 2388 destElem = srcElem1; 2389 } 2390 FpscrQc = fpscr; 2391 ''' 2392 twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2, 2393 sqshlImmCode, hasImm=True) 2394 twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4, 2395 sqshlImmCode, hasImm=True) 2396 twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4, 2397 sqshlImmCode, hasImm=True, scalar=True) 2398 # SQSHL (register) 2399 sqshlCode = ''' 2400 int16_t shiftAmt = (int8_t)srcElem2; 2401 FPSCR fpscr = (FPSCR) FpscrQc; 2402 if (shiftAmt < 0) { 2403 shiftAmt = -shiftAmt; 2404 if (shiftAmt >= sizeof(Element) * 8) { 2405 shiftAmt = sizeof(Element) * 8 - 1; 2406 destElem = 0; 2407 } else { 2408 destElem = (srcElem1 >> shiftAmt); 2409 } 2410 // Make sure the right shift sign extended when it should. 2411 if (srcElem1 < 0 && destElem >= 0) { 2412 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2413 1 - shiftAmt)); 2414 } 2415 } else if (shiftAmt > 0) { 2416 bool sat = false; 2417 if (shiftAmt >= sizeof(Element) * 8) { 2418 if (srcElem1 != 0) 2419 sat = true; 2420 else 2421 destElem = 0; 2422 } else { 2423 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, 2424 sizeof(Element) * 8 - 1 - shiftAmt) != 2425 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2426 sat = true; 2427 } else { 2428 destElem = srcElem1 << shiftAmt; 2429 } 2430 } 2431 if (sat) { 2432 fpscr.qc = 1; 2433 destElem = mask(sizeof(Element) * 8 - 1); 2434 if (srcElem1 < 0) 2435 destElem = ~destElem; 2436 } 2437 } else { 2438 destElem = srcElem1; 2439 } 2440 FpscrQc = fpscr; 2441 ''' 2442 threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2, 2443 sqshlCode) 2444 threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4, 2445 sqshlCode) 2446 threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4, 2447 sqshlCode, scalar=True) 2448 # SQSHLU 2449 sqshluCode = ''' 2450 FPSCR fpscr = (FPSCR) FpscrQc; 2451 if (imm >= sizeof(Element) * 8) { 2452 if (srcElem1 < 0) { 2453 destElem = 0; 2454 fpscr.qc = 1; 2455 } else if (srcElem1 > 0) { 2456 destElem = mask(sizeof(Element) * 8); 2457 fpscr.qc = 1; 2458 } else { 2459 destElem = 0; 2460 } 2461 } else if (imm) { 2462 destElem = (srcElem1 << imm); 2463 uint64_t topBits = bits((uint64_t)srcElem1, 2464 sizeof(Element) * 8 - 1, 2465 sizeof(Element) * 8 - imm); 2466 if (srcElem1 < 0) { 2467 destElem = 0; 2468 fpscr.qc = 1; 2469 } else if (topBits != 0) { 2470 destElem = mask(sizeof(Element) * 8); 2471 fpscr.qc = 1; 2472 } 2473 } else { 2474 if (srcElem1 < 0) { 2475 fpscr.qc = 1; 2476 destElem = 0; 2477 } else { 2478 destElem = srcElem1; 2479 } 2480 } 2481 FpscrQc = fpscr; 2482 ''' 2483 twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2, 2484 sqshluCode, hasImm=True) 2485 twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4, 2486 sqshluCode, hasImm=True) 2487 twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4, 2488 sqshluCode, hasImm=True, scalar=True) 2489 # SQSHRN, SQSHRN2 2490 sqshrnCode = ''' 2491 FPSCR fpscr = (FPSCR) FpscrQc; 2492 if (imm > sizeof(srcElem1) * 8) { 2493 if (srcElem1 != 0 && srcElem1 != -1) 2494 fpscr.qc = 1; 2495 destElem = 0; 2496 } else if (imm) { 2497 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2498 mid |= -(mid & ((BigElement)1 << 2499 (sizeof(BigElement) * 8 - 1 - imm))); 2500 if (mid != (Element)mid) { 2501 destElem = mask(sizeof(Element) * 8 - 1); 2502 if (srcElem1 < 0) 2503 destElem = ~destElem; 2504 fpscr.qc = 1; 2505 } else { 2506 destElem = mid; 2507 } 2508 } else { 2509 destElem = srcElem1; 2510 } 2511 FpscrQc = fpscr; 2512 ''' 2513 twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes, 2514 sqshrnCode, hasImm=True) 2515 twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes, 2516 sqshrnCode, hasImm=True, hi=True) 2517 twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes, 2518 sqshrnCode, hasImm=True, scalar=True) 2519 # SQSHRUN, SQSHRUN2 2520 sqshrunCode = ''' 2521 FPSCR fpscr = (FPSCR) FpscrQc; 2522 if (imm > sizeof(srcElem1) * 8) { 2523 if (srcElem1 != 0) 2524 fpscr.qc = 1; 2525 destElem = 0; 2526 } else if (imm) { 2527 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2528 if (bits(mid, sizeof(BigElement) * 8 - 1, 2529 sizeof(Element) * 8) != 0) { 2530 if (srcElem1 < 0) { 2531 destElem = 0; 2532 } else { 2533 destElem = mask(sizeof(Element) * 8); 2534 } 2535 fpscr.qc = 1; 2536 } else { 2537 destElem = mid; 2538 } 2539 } else { 2540 destElem = srcElem1; 2541 } 2542 FpscrQc = fpscr; 2543 ''' 2544 twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes, 2545 sqshrunCode, hasImm=True) 2546 twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes, 2547 sqshrunCode, hasImm=True, hi=True) 2548 twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes, 2549 sqshrunCode, hasImm=True, scalar=True) 2550 # SQSUB 2551 sqsubCode = ''' 2552 destElem = srcElem1 - srcElem2; 2553 FPSCR fpscr = (FPSCR) FpscrQc; 2554 bool negDest = (destElem < 0); 2555 bool negSrc1 = (srcElem1 < 0); 2556 bool posSrc2 = (srcElem2 >= 0); 2557 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 2558 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2559 if (negDest) 2560 destElem -= 1; 2561 fpscr.qc = 1; 2562 } 2563 FpscrQc = fpscr; 2564 ''' 2565 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2, 2566 sqsubCode) 2567 threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4, 2568 sqsubCode) 2569 threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4, 2570 sqsubCode, scalar=True) 2571 # SQXTN, SQXTN2 2572 sqxtnCode = ''' 2573 FPSCR fpscr = (FPSCR) FpscrQc; 2574 destElem = srcElem1; 2575 if ((BigElement)destElem != srcElem1) { 2576 fpscr.qc = 1; 2577 destElem = mask(sizeof(Element) * 8 - 1); 2578 if (srcElem1 < 0) 2579 destElem = ~destElem; 2580 } 2581 FpscrQc = fpscr; 2582 ''' 2583 twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes, 2584 sqxtnCode) 2585 twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes, 2586 sqxtnCode, hi=True) 2587 twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes, 2588 sqxtnCode, scalar=True) 2589 # SQXTUN, SQXTUN2 2590 sqxtunCode = ''' 2591 FPSCR fpscr = (FPSCR) FpscrQc; 2592 destElem = srcElem1; 2593 if (srcElem1 < 0 || 2594 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 2595 fpscr.qc = 1; 2596 destElem = mask(sizeof(Element) * 8); 2597 if (srcElem1 < 0) 2598 destElem = ~destElem; 2599 } 2600 FpscrQc = fpscr; 2601 ''' 2602 twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes, 2603 sqxtunCode) 2604 twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes, 2605 sqxtunCode, hi=True) 2606 twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes, 2607 sqxtunCode, scalar=True) 2608 # SRHADD 2609 rhaddCode = ''' 2610 Element carryBit = 2611 (((unsigned)srcElem1 & 0x1) + 2612 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 2613 // Use division instead of a shift to ensure the sign extension works 2614 // right. The compiler will figure out if it can be a shift. Mask the 2615 // inputs so they get truncated correctly. 2616 destElem = (((srcElem1 & ~(Element)1) / 2) + 2617 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 2618 ''' 2619 threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2, 2620 rhaddCode) 2621 threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4, 2622 rhaddCode) 2623 # SRI 2624 sriCode = ''' 2625 if (imm >= sizeof(Element) * 8) 2626 destElem = destElem; 2627 else 2628 destElem = (srcElem1 >> imm) | 2629 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2630 ''' 2631 twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode, 2632 True, hasImm=True) 2633 twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode, 2634 True, hasImm=True) 2635 # SRSHL 2636 rshlCode = ''' 2637 int16_t shiftAmt = (int8_t)srcElem2; 2638 if (shiftAmt < 0) { 2639 shiftAmt = -shiftAmt; 2640 Element rBit = 0; 2641 if (shiftAmt <= sizeof(Element) * 8) 2642 rBit = bits(srcElem1, shiftAmt - 1); 2643 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 2644 rBit = 1; 2645 if (shiftAmt >= sizeof(Element) * 8) { 2646 shiftAmt = sizeof(Element) * 8 - 1; 2647 destElem = 0; 2648 } else { 2649 destElem = (srcElem1 >> shiftAmt); 2650 } 2651 // Make sure the right shift sign extended when it should. 2652 if (ltz(srcElem1) && !ltz(destElem)) { 2653 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2654 1 - shiftAmt)); 2655 } 2656 destElem += rBit; 2657 } else if (shiftAmt > 0) { 2658 if (shiftAmt >= sizeof(Element) * 8) { 2659 destElem = 0; 2660 } else { 2661 destElem = srcElem1 << shiftAmt; 2662 } 2663 } else { 2664 destElem = srcElem1; 2665 } 2666 ''' 2667 threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2, 2668 rshlCode) 2669 threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4, 2670 rshlCode) 2671 # SRSHR 2672 rshrCode = ''' 2673 if (imm > sizeof(srcElem1) * 8) { 2674 destElem = 0; 2675 } else if (imm) { 2676 Element rBit = bits(srcElem1, imm - 1); 2677 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2678 } else { 2679 destElem = srcElem1; 2680 } 2681 ''' 2682 twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2, 2683 rshrCode, hasImm=True) 2684 twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4, 2685 rshrCode, hasImm=True) 2686 # SRSRA 2687 rsraCode = ''' 2688 if (imm > sizeof(srcElem1) * 8) { 2689 destElem += 0; 2690 } else if (imm) { 2691 Element rBit = bits(srcElem1, imm - 1); 2692 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2693 } else { 2694 destElem += srcElem1; 2695 } 2696 ''' 2697 twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2, 2698 rsraCode, True, hasImm=True) 2699 twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4, 2700 rsraCode, True, hasImm=True) 2701 # SSHL 2702 shlCode = ''' 2703 int16_t shiftAmt = (int8_t)srcElem2; 2704 if (shiftAmt < 0) { 2705 shiftAmt = -shiftAmt; 2706 if (shiftAmt >= sizeof(Element) * 8) { 2707 shiftAmt = sizeof(Element) * 8 - 1; 2708 destElem = 0; 2709 } else { 2710 destElem = (srcElem1 >> shiftAmt); 2711 } 2712 // Make sure the right shift sign extended when it should. 2713 if (ltz(srcElem1) && !ltz(destElem)) { 2714 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2715 1 - shiftAmt)); 2716 } 2717 } else { 2718 if (shiftAmt >= sizeof(Element) * 8) { 2719 destElem = 0; 2720 } else { 2721 destElem = srcElem1 << shiftAmt; 2722 } 2723 } 2724 ''' 2725 threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2, 2726 shlCode) 2727 threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4, 2728 shlCode) 2729 # SSHLL, SSHLL2 2730 shllCode = ''' 2731 if (imm >= sizeof(destElem) * 8) { 2732 destElem = 0; 2733 } else { 2734 destElem = (BigElement)srcElem1 << imm; 2735 } 2736 ''' 2737 twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes, 2738 shllCode, hasImm=True) 2739 twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes, 2740 shllCode, hasImm=True, hi=True) 2741 # SSHR 2742 shrCode = ''' 2743 if (imm >= sizeof(srcElem1) * 8) { 2744 if (ltz(srcElem1)) 2745 destElem = -1; 2746 else 2747 destElem = 0; 2748 } else { 2749 destElem = srcElem1 >> imm; 2750 } 2751 ''' 2752 twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode, 2753 hasImm=True) 2754 twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode, 2755 hasImm=True) 2756 # SSRA 2757 sraCode = ''' 2758 Element mid;; 2759 if (imm >= sizeof(srcElem1) * 8) { 2760 mid = ltz(srcElem1) ? -1 : 0; 2761 } else { 2762 mid = srcElem1 >> imm; 2763 if (ltz(srcElem1) && !ltz(mid)) { 2764 mid |= -(mid & ((Element)1 << 2765 (sizeof(Element) * 8 - 1 - imm))); 2766 } 2767 } 2768 destElem += mid; 2769 ''' 2770 twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode, 2771 True, hasImm=True) 2772 twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode, 2773 True, hasImm=True) 2774 # SSUBL 2775 sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;" 2776 threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes, 2777 sublwCode) 2778 threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes, 2779 sublwCode, hi=True) 2780 # SSUBW 2781 threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes, 2782 sublwCode) 2783 threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes, 2784 sublwCode, hi=True) 2785 # SUB 2786 subCode = "destElem = srcElem1 - srcElem2;" 2787 threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode) 2788 threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode) 2789 # SUBHN, SUBHN2 2790 subhnCode = ''' 2791 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 2792 (sizeof(Element) * 8); 2793 ''' 2794 threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes, 2795 subhnCode) 2796 threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes, 2797 subhnCode, hi=True) 2798 # SUQADD 2799 suqaddCode = ''' 2800 FPSCR fpscr = (FPSCR) FpscrQc; 2801 Element tmp = destElem + srcElem1; 2802 if (bits(destElem, sizeof(Element) * 8 - 1) == 0) { 2803 if (bits(tmp, sizeof(Element) * 8 - 1) == 1 || 2804 tmp < srcElem1 || tmp < destElem) { 2805 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; 2806 fpscr.qc = 1; 2807 } else { 2808 destElem = tmp; 2809 } 2810 } else { 2811 Element absDestElem = (~destElem) + 1; 2812 if (absDestElem < srcElem1) { 2813 // Still check for positive sat., no need to check for negative sat. 2814 if (bits(tmp, sizeof(Element) * 8 - 1) == 1) { 2815 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; 2816 fpscr.qc = 1; 2817 } else { 2818 destElem = tmp; 2819 } 2820 } else { 2821 destElem = tmp; 2822 } 2823 } 2824 FpscrQc = fpscr; 2825 ''' 2826 twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 2827 suqaddCode, True) 2828 twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4, 2829 suqaddCode, True) 2830 twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4, 2831 suqaddCode, True, scalar=True) 2832 # SXTL -> alias to SSHLL 2833 # TBL 2834 tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2) 2835 tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4) 2836 tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2) 2837 tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4) 2838 tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2) 2839 tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4) 2840 tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2) 2841 tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4) 2842 # TBX 2843 tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2) 2844 tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4) 2845 tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2) 2846 tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4) 2847 tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2) 2848 tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4) 2849 tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2) 2850 tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4) 2851 # TRN1 2852 trnCode = ''' 2853 unsigned part = %s; 2854 for (unsigned i = 0; i < eCount / 2; i++) { 2855 destReg.elements[2 * i] = srcReg1.elements[2 * i + part]; 2856 destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part]; 2857 } 2858 ''' 2859 threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2, 2860 trnCode % "0") 2861 threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4, 2862 trnCode % "0") 2863 # TRN2 2864 threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2, 2865 trnCode % "1") 2866 threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4, 2867 trnCode % "1") 2868 # UABA 2869 threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2, 2870 abaCode, True) 2871 threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4, 2872 abaCode, True) 2873 # UABAL, UABAL2 2874 threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes, 2875 abalCode, True) 2876 threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes, 2877 abalCode, True, hi=True) 2878 # UABD 2879 threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2, 2880 abdCode) 2881 threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4, 2882 abdCode) 2883 # UABDL, UABDL2 2884 threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes, 2885 abdlCode, True) 2886 threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes, 2887 abdlCode, True, hi=True) 2888 # UADALP 2889 twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes, 2890 2, adalpCode, True) 2891 twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes, 2892 4, adalpCode, True) 2893 # UADDL, UADDL2 2894 threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes, 2895 addlwCode) 2896 threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes, 2897 addlwCode, hi=True) 2898 # UADDLP 2899 twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes, 2900 2, addlwCode) 2901 twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes, 2902 4, addlwCode) 2903 # UADDLV 2904 twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp", 2905 ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True) 2906 twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp", 2907 ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True) 2908 twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4, 2909 addAcrossLongCode, doubleDest=True, long=True) 2910 # UADDW 2911 threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes, 2912 addlwCode) 2913 threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes, 2914 addlwCode, hi=True) 2915 # UCVTF (fixed-point) 2916 ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true," 2917 " FPCRRounding(fpscr), fpscr)") 2918 twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, 2919 ucvtfFixedCode, hasImm=True) 2920 twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4, 2921 ucvtfFixedCode, hasImm=True) 2922 twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4, 2923 ucvtfFixedCode, hasImm=True, scalar=True) 2924 # UCVTF (integer) 2925 ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true," 2926 " FPCRRounding(fpscr), fpscr)") 2927 twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, 2928 ucvtfIntCode) 2929 twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4, 2930 ucvtfIntCode) 2931 twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4, 2932 ucvtfIntCode, scalar=True) 2933 # UHADD 2934 threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2, 2935 haddCode) 2936 threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4, 2937 haddCode) 2938 # UHSUB 2939 threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2, 2940 hsubCode) 2941 threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4, 2942 hsubCode) 2943 # UMAX 2944 threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2, 2945 maxCode) 2946 threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4, 2947 maxCode) 2948 # UMAXP 2949 threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2, 2950 maxCode, pairwise=True) 2951 threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4, 2952 maxCode, pairwise=True) 2953 # UMAXV 2954 twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), 2955 2, maxAcrossCode) 2956 twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4, 2957 maxAcrossCode) 2958 # UMIN 2959 threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2, 2960 minCode) 2961 threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4, 2962 minCode) 2963 # UMINP 2964 threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2, 2965 minCode, pairwise=True) 2966 threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4, 2967 minCode, pairwise=True) 2968 # UMINV 2969 twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), 2970 2, minAcrossCode) 2971 twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4, 2972 minAcrossCode) 2973 # UMLAL (by element) 2974 threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp", 2975 smallUnsignedTypes, mlalCode, True, byElem=True) 2976 threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp", 2977 smallUnsignedTypes, mlalCode, True, byElem=True, hi=True) 2978 # UMLAL (vector) 2979 threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes, 2980 mlalCode, True) 2981 threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes, 2982 mlalCode, True, hi=True) 2983 # UMLSL (by element) 2984 threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp", 2985 smallUnsignedTypes, mlslCode, True, byElem=True) 2986 threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp", 2987 smallUnsignedTypes, mlslCode, True, byElem=True, hi=True) 2988 # UMLSL (vector) 2989 threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes, 2990 mlslCode, True) 2991 threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes, 2992 mlslCode, True, hi=True) 2993 # UMOV 2994 insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') 2995 insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X') 2996 # UMULL, UMULL2 (by element) 2997 threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes, 2998 mullCode, byElem=True) 2999 threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes, 3000 mullCode, byElem=True, hi=True) 3001 # UMULL, UMULL2 (vector) 3002 threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes, 3003 mullCode) 3004 threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes, 3005 mullCode, hi=True) 3006 # UQADD 3007 uqaddCode = ''' 3008 destElem = srcElem1 + srcElem2; 3009 FPSCR fpscr = (FPSCR) FpscrQc; 3010 if (destElem < srcElem1 || destElem < srcElem2) { 3011 destElem = (Element)(-1); 3012 fpscr.qc = 1; 3013 } 3014 FpscrQc = fpscr; 3015 ''' 3016 threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 3017 uqaddCode) 3018 threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4, 3019 uqaddCode) 3020 threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4, 3021 uqaddCode, scalar=True) 3022 # UQRSHL 3023 uqrshlCode = ''' 3024 int16_t shiftAmt = (int8_t)srcElem2; 3025 FPSCR fpscr = (FPSCR) FpscrQc; 3026 if (shiftAmt < 0) { 3027 shiftAmt = -shiftAmt; 3028 Element rBit = 0; 3029 if (shiftAmt <= sizeof(Element) * 8) 3030 rBit = bits(srcElem1, shiftAmt - 1); 3031 if (shiftAmt >= sizeof(Element) * 8) { 3032 shiftAmt = sizeof(Element) * 8 - 1; 3033 destElem = 0; 3034 } else { 3035 destElem = (srcElem1 >> shiftAmt); 3036 } 3037 destElem += rBit; 3038 } else { 3039 if (shiftAmt >= sizeof(Element) * 8) { 3040 if (srcElem1 != 0) { 3041 destElem = mask(sizeof(Element) * 8); 3042 fpscr.qc = 1; 3043 } else { 3044 destElem = 0; 3045 } 3046 } else { 3047 if (bits(srcElem1, sizeof(Element) * 8 - 1, 3048 sizeof(Element) * 8 - shiftAmt)) { 3049 destElem = mask(sizeof(Element) * 8); 3050 fpscr.qc = 1; 3051 } else { 3052 destElem = srcElem1 << shiftAmt; 3053 } 3054 } 3055 } 3056 FpscrQc = fpscr; 3057 ''' 3058 threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes, 3059 2, uqrshlCode) 3060 threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4, 3061 uqrshlCode) 3062 threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4, 3063 uqrshlCode, scalar=True) 3064 # UQRSHRN 3065 uqrshrnCode = ''' 3066 FPSCR fpscr = (FPSCR) FpscrQc; 3067 if (imm > sizeof(srcElem1) * 8) { 3068 if (srcElem1 != 0) 3069 fpscr.qc = 1; 3070 destElem = 0; 3071 } else if (imm) { 3072 BigElement mid = (srcElem1 >> (imm - 1)); 3073 uint64_t rBit = mid & 0x1; 3074 mid >>= 1; 3075 mid += rBit; 3076 if (mid != (Element)mid) { 3077 destElem = mask(sizeof(Element) * 8); 3078 fpscr.qc = 1; 3079 } else { 3080 destElem = mid; 3081 } 3082 } else { 3083 if (srcElem1 != (Element)srcElem1) { 3084 destElem = mask(sizeof(Element) * 8 - 1); 3085 fpscr.qc = 1; 3086 } else { 3087 destElem = srcElem1; 3088 } 3089 } 3090 FpscrQc = fpscr; 3091 ''' 3092 twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes, 3093 uqrshrnCode, hasImm=True) 3094 twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp", 3095 smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True) 3096 twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp", 3097 smallUnsignedTypes, uqrshrnCode, hasImm=True, 3098 scalar=True) 3099 # UQSHL (immediate) 3100 uqshlImmCode = ''' 3101 FPSCR fpscr = (FPSCR) FpscrQc; 3102 if (imm >= sizeof(Element) * 8) { 3103 if (srcElem1 != 0) { 3104 destElem = mask(sizeof(Element) * 8); 3105 fpscr.qc = 1; 3106 } else { 3107 destElem = 0; 3108 } 3109 } else if (imm) { 3110 destElem = (srcElem1 << imm); 3111 uint64_t topBits = bits((uint64_t)srcElem1, 3112 sizeof(Element) * 8 - 1, 3113 sizeof(Element) * 8 - imm); 3114 if (topBits != 0) { 3115 destElem = mask(sizeof(Element) * 8); 3116 fpscr.qc = 1; 3117 } 3118 } else { 3119 destElem = srcElem1; 3120 } 3121 FpscrQc = fpscr; 3122 ''' 3123 twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2, 3124 uqshlImmCode, hasImm=True) 3125 twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4, 3126 uqshlImmCode, hasImm=True) 3127 twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4, 3128 uqshlImmCode, hasImm=True, scalar=True) 3129 # UQSHL (register) 3130 uqshlCode = ''' 3131 int16_t shiftAmt = (int8_t)srcElem2; 3132 FPSCR fpscr = (FPSCR) FpscrQc; 3133 if (shiftAmt < 0) { 3134 shiftAmt = -shiftAmt; 3135 if (shiftAmt >= sizeof(Element) * 8) { 3136 shiftAmt = sizeof(Element) * 8 - 1; 3137 destElem = 0; 3138 } else { 3139 destElem = (srcElem1 >> shiftAmt); 3140 } 3141 } else if (shiftAmt > 0) { 3142 if (shiftAmt >= sizeof(Element) * 8) { 3143 if (srcElem1 != 0) { 3144 destElem = mask(sizeof(Element) * 8); 3145 fpscr.qc = 1; 3146 } else { 3147 destElem = 0; 3148 } 3149 } else { 3150 if (bits(srcElem1, sizeof(Element) * 8 - 1, 3151 sizeof(Element) * 8 - shiftAmt)) { 3152 destElem = mask(sizeof(Element) * 8); 3153 fpscr.qc = 1; 3154 } else { 3155 destElem = srcElem1 << shiftAmt; 3156 } 3157 } 3158 } else { 3159 destElem = srcElem1; 3160 } 3161 FpscrQc = fpscr; 3162 ''' 3163 threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2, 3164 uqshlCode) 3165 threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4, 3166 uqshlCode) 3167 threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4, 3168 uqshlCode, scalar=True) 3169 # UQSHRN, UQSHRN2 3170 uqshrnCode = ''' 3171 FPSCR fpscr = (FPSCR) FpscrQc; 3172 if (imm > sizeof(srcElem1) * 8) { 3173 if (srcElem1 != 0) 3174 fpscr.qc = 1; 3175 destElem = 0; 3176 } else if (imm) { 3177 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3178 if (mid != (Element)mid) { 3179 destElem = mask(sizeof(Element) * 8); 3180 fpscr.qc = 1; 3181 } else { 3182 destElem = mid; 3183 } 3184 } else { 3185 destElem = srcElem1; 3186 } 3187 FpscrQc = fpscr; 3188 ''' 3189 twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes, 3190 uqshrnCode, hasImm=True) 3191 twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes, 3192 uqshrnCode, hasImm=True, hi=True) 3193 twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes, 3194 uqshrnCode, hasImm=True, scalar=True) 3195 # UQSUB 3196 uqsubCode = ''' 3197 destElem = srcElem1 - srcElem2; 3198 FPSCR fpscr = (FPSCR) FpscrQc; 3199 if (destElem > srcElem1) { 3200 destElem = 0; 3201 fpscr.qc = 1; 3202 } 3203 FpscrQc = fpscr; 3204 ''' 3205 threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2, 3206 uqsubCode) 3207 threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4, 3208 uqsubCode) 3209 threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4, 3210 uqsubCode, scalar=True) 3211 # UQXTN 3212 uqxtnCode = ''' 3213 FPSCR fpscr = (FPSCR) FpscrQc; 3214 destElem = srcElem1; 3215 if ((BigElement)destElem != srcElem1) { 3216 fpscr.qc = 1; 3217 destElem = mask(sizeof(Element) * 8); 3218 } 3219 FpscrQc = fpscr; 3220 ''' 3221 twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes, 3222 uqxtnCode) 3223 twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes, 3224 uqxtnCode, hi=True) 3225 twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes, 3226 uqxtnCode, scalar=True) 3227 # URECPE 3228 urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);" 3229 twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2, 3230 urecpeCode) 3231 twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4, 3232 urecpeCode) 3233 # URHADD 3234 threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes, 3235 2, rhaddCode) 3236 threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes, 3237 4, rhaddCode) 3238 # URSHL 3239 threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2, 3240 rshlCode) 3241 threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4, 3242 rshlCode) 3243 # URSHR 3244 twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2, 3245 rshrCode, hasImm=True) 3246 twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4, 3247 rshrCode, hasImm=True) 3248 # URSQRTE 3249 ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);" 3250 twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2, 3251 ursqrteCode) 3252 twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4, 3253 ursqrteCode) 3254 # URSRA 3255 twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2, 3256 rsraCode, True, hasImm=True) 3257 twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4, 3258 rsraCode, True, hasImm=True) 3259 # USHL 3260 threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2, 3261 shlCode) 3262 threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4, 3263 shlCode) 3264 # USHLL, USHLL2 3265 twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes, 3266 shllCode, hasImm=True) 3267 twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes, 3268 shllCode, hi=True, hasImm=True) 3269 # USHR 3270 twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2, 3271 shrCode, hasImm=True) 3272 twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4, 3273 shrCode, hasImm=True) 3274 # USQADD 3275 usqaddCode = ''' 3276 FPSCR fpscr = (FPSCR) FpscrQc; 3277 Element tmp = destElem + srcElem1; 3278 if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) { 3279 if (tmp < srcElem1 || tmp < destElem) { 3280 destElem = (Element)(-1); 3281 fpscr.qc = 1; 3282 } else { 3283 destElem = tmp; 3284 } 3285 } else { 3286 Element absSrcElem1 = (~srcElem1) + 1; 3287 if (absSrcElem1 > destElem) { 3288 destElem = 0; 3289 fpscr.qc = 1; 3290 } else { 3291 destElem = tmp; 3292 } 3293 } 3294 FpscrQc = fpscr; 3295 ''' 3296 twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 3297 usqaddCode, True) 3298 twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4, 3299 usqaddCode, True) 3300 twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4, 3301 usqaddCode, True, scalar=True) 3302 # USRA 3303 twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2, 3304 sraCode, True, hasImm=True) 3305 twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4, 3306 sraCode, True, hasImm=True) 3307 # USUBL 3308 threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes, 3309 sublwCode) 3310 threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes, 3311 sublwCode, hi=True) 3312 # USUBW 3313 threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes, 3314 sublwCode) 3315 threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes, 3316 sublwCode, hi=True) 3317 # UXTL -> alias to USHLL 3318 # UZP1 3319 uzpCode = ''' 3320 unsigned part = %s; 3321 for (unsigned i = 0; i < eCount / 2; i++) { 3322 destReg.elements[i] = srcReg1.elements[2 * i + part]; 3323 destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part]; 3324 } 3325 ''' 3326 threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2, 3327 uzpCode % "0") 3328 threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4, 3329 uzpCode % "0") 3330 # UZP2 3331 threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2, 3332 uzpCode % "1") 3333 threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4, 3334 uzpCode % "1") 3335 # XTN, XTN2 3336 xtnCode = "destElem = srcElem1;" 3337 twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode) 3338 twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes, 3339 xtnCode, hi=True) 3340 # ZIP1 3341 zipCode = ''' 3342 unsigned base = %s; 3343 for (unsigned i = 0; i < eCount / 2; i++) { 3344 destReg.elements[2 * i] = srcReg1.elements[base + i]; 3345 destReg.elements[2 * i + 1] = srcReg2.elements[base + i]; 3346 } 3347 ''' 3348 threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2, 3349 zipCode % "0") 3350 threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4, 3351 zipCode % "0") 3352 # ZIP2 3353 threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2, 3354 zipCode % "eCount / 2") 3355 threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4, 3356 zipCode % "eCount / 2") 3357 3358}}; 3359