1// -*- mode: c++ -*- 2 3// Copyright (c) 2012-2013, 2015-2018 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Giacomo Gabrielli 39// Mbou Eyole 40 41let {{ 42 43 header_output = "" 44 exec_output = "" 45 decoders = { 'Generic' : {} } 46 47 # FP types (FP operations always work with unsigned representations) 48 floatTypes = ("uint16_t", "uint32_t", "uint64_t") 49 smallFloatTypes = ("uint32_t",) 50 51 zeroSveVecRegUpperPartCode = ''' 52 TheISA::ISA::zeroSveVecRegUpperPart(%s, 53 ArmStaticInst::getCurSveVecLen<uint64_t>(xc->tcBase())); 54 ''' 55 56 def threeEqualRegInstX(name, Name, opClass, types, rCount, op, 57 readDest=False, pairwise=False, scalar=False, 58 byElem=False, decoder='Generic'): 59 assert (not pairwise) or ((not byElem) and (not scalar)) 60 global header_output, exec_output, decoders 61 eWalkCode = simd64EnabledCheckCode + ''' 62 RegVect srcReg1, destReg; 63 ''' 64 if byElem: 65 # 2nd register operand has to be read fully 66 eWalkCode += ''' 67 FullRegVect srcReg2; 68 ''' 69 else: 70 eWalkCode += ''' 71 RegVect srcReg2; 72 ''' 73 for reg in range(rCount): 74 eWalkCode += ''' 75 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 76 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 77 ''' % { "reg" : reg } 78 if readDest: 79 eWalkCode += ''' 80 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 81 ''' % { "reg" : reg } 82 if byElem: 83 # 2nd operand has to be read fully 84 for reg in range(rCount, 4): 85 eWalkCode += ''' 86 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 87 ''' % { "reg" : reg } 88 readDestCode = '' 89 if readDest: 90 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 91 if pairwise: 92 eWalkCode += ''' 93 for (unsigned i = 0; i < eCount; i++) { 94 Element srcElem1 = gtoh(2 * i < eCount ? 95 srcReg1.elements[2 * i] : 96 srcReg2.elements[2 * i - eCount]); 97 Element srcElem2 = gtoh(2 * i < eCount ? 98 srcReg1.elements[2 * i + 1] : 99 srcReg2.elements[2 * i + 1 - eCount]); 100 Element destElem; 101 %(readDest)s 102 %(op)s 103 destReg.elements[i] = htog(destElem); 104 } 105 ''' % { "op" : op, "readDest" : readDestCode } 106 else: 107 scalarCheck = ''' 108 if (i != 0) { 109 destReg.elements[i] = 0; 110 continue; 111 } 112 ''' 113 eWalkCode += ''' 114 for (unsigned i = 0; i < eCount; i++) { 115 %(scalarCheck)s 116 Element srcElem1 = gtoh(srcReg1.elements[i]); 117 Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); 118 Element destElem; 119 %(readDest)s 120 %(op)s 121 destReg.elements[i] = htog(destElem); 122 } 123 ''' % { "op" : op, "readDest" : readDestCode, 124 "scalarCheck" : scalarCheck if scalar else "", 125 "src2Index" : "imm" if byElem else "i" } 126 for reg in range(rCount): 127 eWalkCode += ''' 128 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 129 ''' % { "reg" : reg } 130 if rCount < 4: # zero upper half 131 for reg in range(rCount, 4): 132 eWalkCode += ''' 133 AA64FpDestP%(reg)d_uw = 0; 134 ''' % { "reg" : reg } 135 iop = InstObjParams(name, Name, 136 "DataX2RegImmOp" if byElem else "DataX2RegOp", 137 { "code": eWalkCode, 138 "r_count": rCount, 139 "op_class": opClass }, []) 140 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 141 if byElem: 142 header_output += NeonX2RegImmOpDeclare.subst(iop) 143 else: 144 header_output += NeonX2RegOpDeclare.subst(iop) 145 exec_output += NeonXEqualRegOpExecute.subst(iop) 146 for type in types: 147 substDict = { "targs" : type, 148 "class_name" : Name } 149 exec_output += NeonXExecDeclare.subst(substDict) 150 151 def threeUnequalRegInstX(name, Name, opClass, types, op, 152 bigSrc1, bigSrc2, bigDest, readDest, scalar=False, 153 byElem=False, hi=False): 154 assert not (scalar and hi) 155 global header_output, exec_output 156 src1Cnt = src2Cnt = destCnt = 2 157 src1Prefix = src2Prefix = destPrefix = '' 158 if bigSrc1: 159 src1Cnt = 4 160 src1Prefix = 'Big' 161 if bigSrc2: 162 src2Cnt = 4 163 src2Prefix = 'Big' 164 if bigDest: 165 destCnt = 4 166 destPrefix = 'Big' 167 if byElem: 168 src2Prefix = 'Full' 169 eWalkCode = simd64EnabledCheckCode + ''' 170 %sRegVect srcReg1; 171 %sRegVect srcReg2; 172 %sRegVect destReg; 173 ''' % (src1Prefix, src2Prefix, destPrefix) 174 srcReg1 = 0 175 if hi and not bigSrc1: # long/widening operations 176 srcReg1 = 2 177 for reg in range(src1Cnt): 178 eWalkCode += ''' 179 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw); 180 ''' % { "reg" : reg, "srcReg1" : srcReg1 } 181 srcReg1 += 1 182 srcReg2 = 0 183 if (not byElem) and (hi and not bigSrc2): # long/widening operations 184 srcReg2 = 2 185 for reg in range(src2Cnt): 186 eWalkCode += ''' 187 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw); 188 ''' % { "reg" : reg, "srcReg2" : srcReg2 } 189 srcReg2 += 1 190 if byElem: 191 # 2nd operand has to be read fully 192 for reg in range(src2Cnt, 4): 193 eWalkCode += ''' 194 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 195 ''' % { "reg" : reg } 196 if readDest: 197 for reg in range(destCnt): 198 eWalkCode += ''' 199 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 200 ''' % { "reg" : reg } 201 readDestCode = '' 202 if readDest: 203 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 204 scalarCheck = ''' 205 if (i != 0) { 206 destReg.elements[i] = 0; 207 continue; 208 } 209 ''' 210 eWalkCode += ''' 211 for (unsigned i = 0; i < eCount; i++) { 212 %(scalarCheck)s 213 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 214 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); 215 %(destPrefix)sElement destElem; 216 %(readDest)s 217 %(op)s 218 destReg.elements[i] = htog(destElem); 219 } 220 ''' % { "op" : op, "readDest" : readDestCode, 221 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 222 "destPrefix" : destPrefix, 223 "scalarCheck" : scalarCheck if scalar else "", 224 "src2Index" : "imm" if byElem else "i" } 225 destReg = 0 226 if hi and not bigDest: 227 # narrowing operations 228 destReg = 2 229 for reg in range(destCnt): 230 eWalkCode += ''' 231 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); 232 ''' % { "reg" : reg, "destReg": destReg } 233 destReg += 1 234 if destCnt < 4: 235 if hi: # Explicitly merge with lower half 236 for reg in range(0, destCnt): 237 eWalkCode += ''' 238 AA64FpDestP%(reg)d_uw = AA64FpDestP%(reg)d_uw;''' % { "reg" : reg } 239 else: # zero upper half 240 for reg in range(destCnt, 4): 241 eWalkCode += ''' 242 AA64FpDestP%(reg)d_uw = 0;''' % { "reg" : reg } 243 244 iop = InstObjParams(name, Name, 245 "DataX2RegImmOp" if byElem else "DataX2RegOp", 246 { "code": eWalkCode, 247 "r_count": 2, 248 "op_class": opClass }, []) 249 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 250 if byElem: 251 header_output += NeonX2RegImmOpDeclare.subst(iop) 252 else: 253 header_output += NeonX2RegOpDeclare.subst(iop) 254 exec_output += NeonXUnequalRegOpExecute.subst(iop) 255 for type in types: 256 substDict = { "targs" : type, 257 "class_name" : Name } 258 exec_output += NeonXExecDeclare.subst(substDict) 259 260 def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False, 261 scalar=False, byElem=False, hi=False): 262 assert not byElem 263 threeUnequalRegInstX(name, Name, opClass, types, op, 264 True, True, False, readDest, scalar, byElem, hi) 265 266 def threeRegLongInstX(name, Name, opClass, types, op, readDest=False, 267 scalar=False, byElem=False, hi=False): 268 threeUnequalRegInstX(name, Name, opClass, types, op, 269 False, False, True, readDest, scalar, byElem, hi) 270 271 def threeRegWideInstX(name, Name, opClass, types, op, readDest=False, 272 scalar=False, byElem=False, hi=False): 273 assert not byElem 274 threeUnequalRegInstX(name, Name, opClass, types, op, 275 True, False, True, readDest, scalar, byElem, hi) 276 277 def twoEqualRegInstX(name, Name, opClass, types, rCount, op, 278 readDest=False, scalar=False, byElem=False, 279 hasImm=False, isDup=False): 280 global header_output, exec_output 281 assert (not isDup) or byElem 282 if byElem: 283 hasImm = True 284 if isDup: 285 eWalkCode = simd64EnabledCheckCode + ''' 286 FullRegVect srcReg1; 287 RegVect destReg; 288 ''' 289 else: 290 eWalkCode = simd64EnabledCheckCode + ''' 291 RegVect srcReg1, destReg; 292 ''' 293 for reg in range(4 if isDup else rCount): 294 eWalkCode += ''' 295 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 296 ''' % { "reg" : reg } 297 if readDest: 298 eWalkCode += ''' 299 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 300 ''' % { "reg" : reg } 301 readDestCode = '' 302 if readDest: 303 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 304 scalarCheck = ''' 305 if (i != 0) { 306 destReg.elements[i] = 0; 307 continue; 308 } 309 ''' 310 eWalkCode += ''' 311 for (unsigned i = 0; i < eCount; i++) { 312 %(scalarCheck)s 313 unsigned j = i; 314 Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]); 315 Element destElem; 316 %(readDest)s 317 %(op)s 318 destReg.elements[j] = htog(destElem); 319 } 320 ''' % { "op" : op, "readDest" : readDestCode, 321 "scalarCheck" : scalarCheck if scalar else "", 322 "src1Index" : "imm" if byElem else "i" } 323 for reg in range(rCount): 324 eWalkCode += ''' 325 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 326 ''' % { "reg" : reg } 327 if rCount < 4: # zero upper half 328 for reg in range(rCount, 4): 329 eWalkCode += ''' 330 AA64FpDestP%(reg)d_uw = 0; 331 ''' % { "reg" : reg } 332 iop = InstObjParams(name, Name, 333 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 334 { "code": eWalkCode, 335 "r_count": rCount, 336 "op_class": opClass }, []) 337 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 338 if hasImm: 339 header_output += NeonX1RegImmOpDeclare.subst(iop) 340 else: 341 header_output += NeonX1RegOpDeclare.subst(iop) 342 exec_output += NeonXEqualRegOpExecute.subst(iop) 343 for type in types: 344 substDict = { "targs" : type, 345 "class_name" : Name } 346 exec_output += NeonXExecDeclare.subst(substDict) 347 348 def twoRegLongInstX(name, Name, opClass, types, op, readDest=False, 349 hi=False, hasImm=False): 350 global header_output, exec_output 351 eWalkCode = simd64EnabledCheckCode + ''' 352 RegVect srcReg1; 353 BigRegVect destReg; 354 ''' 355 destReg = 0 if not hi else 2 356 for reg in range(2): 357 eWalkCode += ''' 358 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw); 359 ''' % { "reg" : reg, "destReg": destReg } 360 destReg += 1 361 destReg = 0 if not hi else 2 362 if readDest: 363 for reg in range(4): 364 eWalkCode += ''' 365 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 366 ''' % { "reg" : reg } 367 destReg += 1 368 readDestCode = '' 369 if readDest: 370 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 371 eWalkCode += ''' 372 for (unsigned i = 0; i < eCount; i++) { 373 Element srcElem1 = gtoh(srcReg1.elements[i]); 374 BigElement destElem; 375 %(readDest)s 376 %(op)s 377 destReg.elements[i] = htog(destElem); 378 } 379 ''' % { "op" : op, "readDest" : readDestCode } 380 for reg in range(4): 381 eWalkCode += ''' 382 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 383 ''' % { "reg" : reg } 384 iop = InstObjParams(name, Name, 385 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 386 { "code": eWalkCode, 387 "r_count": 2, 388 "op_class": opClass }, []) 389 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 390 if hasImm: 391 header_output += NeonX1RegImmOpDeclare.subst(iop) 392 else: 393 header_output += NeonX1RegOpDeclare.subst(iop) 394 exec_output += NeonXUnequalRegOpExecute.subst(iop) 395 for type in types: 396 substDict = { "targs" : type, 397 "class_name" : Name } 398 exec_output += NeonXExecDeclare.subst(substDict) 399 400 def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False, 401 scalar=False, hi=False, hasImm=False): 402 global header_output, exec_output 403 eWalkCode = simd64EnabledCheckCode + ''' 404 BigRegVect srcReg1; 405 RegVect destReg; 406 ''' 407 for reg in range(4): 408 eWalkCode += ''' 409 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 410 ''' % { "reg" : reg } 411 if readDest: 412 for reg in range(2): 413 eWalkCode += ''' 414 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 415 ''' % { "reg" : reg } 416 else: 417 eWalkCode += ''' 418 destReg.elements[0] = 0; 419 ''' % { "reg" : reg } 420 readDestCode = '' 421 if readDest: 422 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 423 scalarCheck = ''' 424 if (i != 0) { 425 destReg.elements[i] = 0; 426 continue; 427 } 428 ''' 429 eWalkCode += ''' 430 for (unsigned i = 0; i < eCount; i++) { 431 %(scalarCheck)s 432 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 433 Element destElem; 434 %(readDest)s 435 %(op)s 436 destReg.elements[i] = htog(destElem); 437 } 438 ''' % { "op" : op, "readDest" : readDestCode, 439 "scalarCheck" : scalarCheck if scalar else "" } 440 destReg = 0 if not hi else 2 441 for reg in range(2): 442 eWalkCode += ''' 443 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); 444 ''' % { "reg" : reg, "destReg": destReg } 445 destReg += 1 446 if hi: 447 for reg in range(0, 2): # Explicitly merge with the lower half 448 eWalkCode += ''' 449 AA64FpDestP%(reg)d_uw = AA64FpDestP%(reg)d_uw;''' % { "reg" : reg } 450 else: 451 for reg in range(2, 4): # zero upper half 452 eWalkCode += ''' 453 AA64FpDestP%(reg)d_uw = 0; 454 ''' % { "reg" : reg } 455 456 iop = InstObjParams(name, Name, 457 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 458 { "code": eWalkCode, 459 "r_count": 2, 460 "op_class": opClass }, []) 461 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 462 if hasImm: 463 header_output += NeonX1RegImmOpDeclare.subst(iop) 464 else: 465 header_output += NeonX1RegOpDeclare.subst(iop) 466 exec_output += NeonXUnequalRegOpExecute.subst(iop) 467 for type in types: 468 substDict = { "targs" : type, 469 "class_name" : Name } 470 exec_output += NeonXExecDeclare.subst(substDict) 471 472 def threeRegScrambleInstX(name, Name, opClass, types, rCount, op): 473 global header_output, exec_output 474 eWalkCode = simd64EnabledCheckCode + ''' 475 RegVect srcReg1, srcReg2, destReg; 476 ''' 477 for reg in range(rCount): 478 eWalkCode += ''' 479 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 480 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 481 ''' % { "reg" : reg } 482 eWalkCode += op 483 for reg in range(rCount): 484 eWalkCode += ''' 485 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 486 ''' % { "reg" : reg } 487 if rCount < 4: 488 for reg in range(rCount, 4): 489 eWalkCode += ''' 490 AA64FpDestP%(reg)d_uw = 0; 491 ''' % { "reg" : reg } 492 iop = InstObjParams(name, Name, 493 "DataX2RegOp", 494 { "code": eWalkCode, 495 "r_count": rCount, 496 "op_class": opClass }, []) 497 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 498 header_output += NeonX2RegOpDeclare.subst(iop) 499 exec_output += NeonXEqualRegOpExecute.subst(iop) 500 for type in types: 501 substDict = { "targs" : type, 502 "class_name" : Name } 503 exec_output += NeonXExecDeclare.subst(substDict) 504 505 def insFromVecElemInstX(name, Name, opClass, types, rCount): 506 global header_output, exec_output 507 eWalkCode = simd64EnabledCheckCode + ''' 508 FullRegVect srcReg1; 509 RegVect destReg; 510 ''' 511 for reg in range(4): 512 eWalkCode += ''' 513 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 514 ''' % { "reg" : reg } 515 for reg in range(rCount): 516 eWalkCode += ''' 517 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 518 ''' % { "reg" : reg } 519 eWalkCode += ''' 520 Element srcElem1 = gtoh(srcReg1.elements[imm2]); 521 Element destElem = srcElem1; 522 destReg.elements[imm1] = htog(destElem); 523 ''' 524 for reg in range(rCount): 525 eWalkCode += ''' 526 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 527 ''' % { "reg" : reg } 528 iop = InstObjParams(name, Name, 529 "DataX1Reg2ImmOp", 530 { "code": eWalkCode, 531 "r_count": rCount, 532 "op_class": opClass }, []) 533 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 534 header_output += NeonX1Reg2ImmOpDeclare.subst(iop) 535 exec_output += NeonXEqualRegOpExecute.subst(iop) 536 for type in types: 537 substDict = { "targs" : type, 538 "class_name" : Name } 539 exec_output += NeonXExecDeclare.subst(substDict) 540 541 def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op): 542 global header_output, exec_output 543 eWalkCode = simd64EnabledCheckCode + ''' 544 RegVect srcReg1, destReg; 545 ''' 546 for reg in range(rCount): 547 eWalkCode += ''' 548 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 549 ''' % { "reg" : reg } 550 eWalkCode += ''' 551 Element srcElem1 = gtoh(srcReg1.elements[0]); 552 Element srcElem2 = gtoh(srcReg1.elements[1]); 553 Element destElem; 554 %(op)s 555 destReg.elements[0] = htog(destElem); 556 ''' % { "op" : op } 557 destCnt = rCount / 2 558 for reg in range(destCnt): 559 eWalkCode += ''' 560 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 561 ''' % { "reg" : reg } 562 for reg in range(destCnt, 4): # zero upper half 563 eWalkCode += ''' 564 AA64FpDestP%(reg)d_uw = 0; 565 ''' % { "reg" : reg } 566 iop = InstObjParams(name, Name, 567 "DataX1RegOp", 568 { "code": eWalkCode, 569 "r_count": rCount, 570 "op_class": opClass }, []) 571 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 572 header_output += NeonX1RegOpDeclare.subst(iop) 573 exec_output += NeonXEqualRegOpExecute.subst(iop) 574 for type in types: 575 substDict = { "targs" : type, 576 "class_name" : Name } 577 exec_output += NeonXExecDeclare.subst(substDict) 578 579 def twoRegAcrossInstX(name, Name, opClass, types, rCount, op, 580 doubleDest=False, long=False): 581 global header_output, exec_output 582 destPrefix = "Big" if long else "" 583 eWalkCode = simd64EnabledCheckCode + ''' 584 RegVect srcReg1; 585 %sRegVect destReg; 586 ''' % destPrefix 587 for reg in range(rCount): 588 eWalkCode += ''' 589 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 590 ''' % { "reg" : reg } 591 eWalkCode += ''' 592 destReg.regs[0] = 0; 593 %(destPrefix)sElement destElem = 0; 594 for (unsigned i = 0; i < eCount; i++) { 595 Element srcElem1 = gtoh(srcReg1.elements[i]); 596 if (i == 0) { 597 destElem = srcElem1; 598 } else { 599 %(op)s 600 } 601 } 602 destReg.elements[0] = htog(destElem); 603 ''' % { "op" : op, "destPrefix" : destPrefix } 604 destCnt = 2 if doubleDest else 1 605 for reg in range(destCnt): 606 eWalkCode += ''' 607 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 608 ''' % { "reg" : reg } 609 for reg in range(destCnt, 4): # zero upper half 610 eWalkCode += ''' 611 AA64FpDestP%(reg)d_uw = 0; 612 ''' % { "reg" : reg } 613 iop = InstObjParams(name, Name, 614 "DataX1RegOp", 615 { "code": eWalkCode, 616 "r_count": rCount, 617 "op_class": opClass }, []) 618 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 619 header_output += NeonX1RegOpDeclare.subst(iop) 620 if long: 621 exec_output += NeonXUnequalRegOpExecute.subst(iop) 622 else: 623 exec_output += NeonXEqualRegOpExecute.subst(iop) 624 for type in types: 625 substDict = { "targs" : type, 626 "class_name" : Name } 627 exec_output += NeonXExecDeclare.subst(substDict) 628 629 def twoRegCondenseInstX(name, Name, opClass, types, rCount, op, 630 readDest=False): 631 global header_output, exec_output 632 eWalkCode = simd64EnabledCheckCode + ''' 633 RegVect srcRegs; 634 BigRegVect destReg; 635 ''' 636 for reg in range(rCount): 637 eWalkCode += ''' 638 srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 639 ''' % { "reg" : reg } 640 if readDest: 641 eWalkCode += ''' 642 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 643 ''' % { "reg" : reg } 644 readDestCode = '' 645 if readDest: 646 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 647 eWalkCode += ''' 648 for (unsigned i = 0; i < eCount / 2; i++) { 649 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 650 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 651 BigElement destElem; 652 %(readDest)s 653 %(op)s 654 destReg.elements[i] = htog(destElem); 655 } 656 ''' % { "op" : op, "readDest" : readDestCode } 657 for reg in range(rCount): 658 eWalkCode += ''' 659 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 660 ''' % { "reg" : reg } 661 if rCount < 4: # zero upper half 662 for reg in range(rCount, 4): 663 eWalkCode += ''' 664 AA64FpDestP%(reg)d_uw = 0; 665 ''' % { "reg" : reg } 666 iop = InstObjParams(name, Name, 667 "DataX1RegOp", 668 { "code": eWalkCode, 669 "r_count": rCount, 670 "op_class": opClass }, []) 671 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 672 header_output += NeonX1RegOpDeclare.subst(iop) 673 exec_output += NeonXUnequalRegOpExecute.subst(iop) 674 for type in types: 675 substDict = { "targs" : type, 676 "class_name" : Name } 677 exec_output += NeonXExecDeclare.subst(substDict) 678 679 def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False): 680 global header_output, exec_output 681 eWalkCode = simd64EnabledCheckCode + ''' 682 RegVect destReg; 683 ''' 684 if readDest: 685 for reg in range(rCount): 686 eWalkCode += ''' 687 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 688 ''' % { "reg" : reg } 689 readDestCode = '' 690 if readDest: 691 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 692 eWalkCode += ''' 693 for (unsigned i = 0; i < eCount; i++) { 694 Element destElem; 695 %(readDest)s 696 %(op)s 697 destReg.elements[i] = htog(destElem); 698 } 699 ''' % { "op" : op, "readDest" : readDestCode } 700 for reg in range(rCount): 701 eWalkCode += ''' 702 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 703 ''' % { "reg" : reg } 704 if rCount < 4: # zero upper half 705 for reg in range(rCount, 4): 706 eWalkCode += ''' 707 AA64FpDestP%(reg)d_uw = 0; 708 ''' % { "reg" : reg } 709 iop = InstObjParams(name, Name, 710 "DataXImmOnlyOp", 711 { "code": eWalkCode, 712 "r_count": rCount, 713 "op_class": opClass }, []) 714 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 715 header_output += NeonX1RegImmOnlyOpDeclare.subst(iop) 716 exec_output += NeonXEqualRegOpExecute.subst(iop) 717 for type in types: 718 substDict = { "targs" : type, 719 "class_name" : Name } 720 exec_output += NeonXExecDeclare.subst(substDict) 721 722 def dupGprInstX(name, Name, opClass, types, rCount, gprSpec): 723 global header_output, exec_output 724 eWalkCode = simd64EnabledCheckCode + ''' 725 RegVect destReg; 726 for (unsigned i = 0; i < eCount; i++) { 727 destReg.elements[i] = htog((Element) %sOp1); 728 } 729 ''' % gprSpec 730 for reg in range(rCount): 731 eWalkCode += ''' 732 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 733 ''' % { "reg" : reg } 734 if rCount < 4: # zero upper half 735 for reg in range(rCount, 4): 736 eWalkCode += ''' 737 AA64FpDestP%(reg)d_uw = 0; 738 ''' % { "reg" : reg } 739 iop = InstObjParams(name, Name, 740 "DataX1RegOp", 741 { "code": eWalkCode, 742 "r_count": rCount, 743 "op_class": opClass }, []) 744 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 745 header_output += NeonX1RegOpDeclare.subst(iop) 746 exec_output += NeonXEqualRegOpExecute.subst(iop) 747 for type in types: 748 substDict = { "targs" : type, 749 "class_name" : Name } 750 exec_output += NeonXExecDeclare.subst(substDict) 751 752 def extInstX(name, Name, opClass, types, rCount, op): 753 global header_output, exec_output 754 eWalkCode = simd64EnabledCheckCode + ''' 755 RegVect srcReg1, srcReg2, destReg; 756 ''' 757 for reg in range(rCount): 758 eWalkCode += ''' 759 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 760 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 761 ''' % { "reg" : reg } 762 eWalkCode += op 763 for reg in range(rCount): 764 eWalkCode += ''' 765 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 766 ''' % { "reg" : reg } 767 if rCount < 4: # zero upper half 768 for reg in range(rCount, 4): 769 eWalkCode += ''' 770 AA64FpDestP%(reg)d_uw = 0; 771 ''' % { "reg" : reg } 772 iop = InstObjParams(name, Name, 773 "DataX2RegImmOp", 774 { "code": eWalkCode, 775 "r_count": rCount, 776 "op_class": opClass }, []) 777 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 778 header_output += NeonX2RegImmOpDeclare.subst(iop) 779 exec_output += NeonXEqualRegOpExecute.subst(iop) 780 for type in types: 781 substDict = { "targs" : type, 782 "class_name" : Name } 783 exec_output += NeonXExecDeclare.subst(substDict) 784 785 def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec): 786 global header_output, exec_output 787 eWalkCode = simd64EnabledCheckCode + ''' 788 RegVect destReg; 789 ''' 790 for reg in range(rCount): 791 eWalkCode += ''' 792 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 793 ''' % { "reg" : reg } 794 eWalkCode += ''' 795 destReg.elements[imm] = htog((Element) %sOp1); 796 ''' % gprSpec 797 for reg in range(rCount): 798 eWalkCode += ''' 799 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 800 ''' % { "reg" : reg } 801 iop = InstObjParams(name, Name, 802 "DataX1RegImmOp", 803 { "code": eWalkCode, 804 "r_count": rCount, 805 "op_class": opClass }, []) 806 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 807 header_output += NeonX1RegImmOpDeclare.subst(iop) 808 exec_output += NeonXEqualRegOpExecute.subst(iop) 809 for type in types: 810 substDict = { "targs" : type, 811 "class_name" : Name } 812 exec_output += NeonXExecDeclare.subst(substDict) 813 814 def insToGprInstX(name, Name, opClass, types, rCount, gprSpec, 815 signExt=False): 816 global header_output, exec_output 817 eWalkCode = simd64EnabledCheckCode + ''' 818 FullRegVect srcReg; 819 ''' 820 for reg in range(4): 821 eWalkCode += ''' 822 srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 823 ''' % { "reg" : reg } 824 if signExt: 825 eWalkCode += ''' 826 %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]); 827 ''' % gprSpec 828 else: 829 eWalkCode += ''' 830 %sDest = srcReg.elements[imm]; 831 ''' % gprSpec 832 iop = InstObjParams(name, Name, 833 "DataX1RegImmOp", 834 { "code": eWalkCode, 835 "r_count": rCount, 836 "op_class": opClass }, []) 837 header_output += NeonX1RegImmOpDeclare.subst(iop) 838 exec_output += NeonXEqualRegOpExecute.subst(iop) 839 for type in types: 840 substDict = { "targs" : type, 841 "class_name" : Name } 842 exec_output += NeonXExecDeclare.subst(substDict) 843 844 def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount): 845 global header_output, decoder_output, exec_output 846 code = simd64EnabledCheckCode + ''' 847 union 848 { 849 uint8_t bytes[64]; 850 uint32_t regs[16]; 851 } table; 852 853 union 854 { 855 uint8_t bytes[%(rCount)d * 4]; 856 uint32_t regs[%(rCount)d]; 857 } destReg, srcReg2; 858 859 const unsigned length = %(length)d; 860 const bool isTbl = %(isTbl)s; 861 ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl } 862 for reg in range(rCount): 863 code += ''' 864 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 865 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 866 ''' % { "reg" : reg } 867 for reg in range(16): 868 if reg < length * 4: 869 code += ''' 870 table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw); 871 ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 } 872 else: 873 code += ''' 874 table.regs[%(reg)d] = 0; 875 ''' % { "reg" : reg } 876 code += ''' 877 for (unsigned i = 0; i < sizeof(destReg); i++) { 878 uint8_t index = srcReg2.bytes[i]; 879 if (index < 16 * length) { 880 destReg.bytes[i] = table.bytes[index]; 881 } else { 882 if (isTbl) 883 destReg.bytes[i] = 0; 884 // else destReg.bytes[i] unchanged 885 } 886 } 887 ''' 888 for reg in range(rCount): 889 code += ''' 890 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 891 ''' % { "reg" : reg } 892 if rCount < 4: # zero upper half 893 for reg in range(rCount, 4): 894 code += ''' 895 AA64FpDestP%(reg)d_uw = 0; 896 ''' % { "reg" : reg } 897 iop = InstObjParams(name, Name, 898 "DataX2RegOp", 899 { "code": code, 900 "r_count": rCount, 901 "op_class": opClass }, []) 902 iop.snippets["code"] += zeroSveVecRegUpperPartCode % "AA64FpDest" 903 header_output += NeonX2RegOpDeclare.subst(iop) 904 exec_output += NeonXEqualRegOpExecute.subst(iop) 905 for type in types: 906 substDict = { "targs" : type, 907 "class_name" : Name } 908 exec_output += NeonXExecDeclare.subst(substDict) 909 910 # ABS 911 absCode = ''' 912 if (srcElem1 < 0) { 913 destElem = -srcElem1; 914 } else { 915 destElem = srcElem1; 916 } 917 ''' 918 twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode) 919 twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode) 920 # ADD 921 addCode = "destElem = srcElem1 + srcElem2;" 922 threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode) 923 threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode) 924 # ADDHN, ADDHN2 925 addhnCode = ''' 926 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 927 (sizeof(Element) * 8); 928 ''' 929 threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes, 930 addhnCode) 931 threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes, 932 addhnCode, hi=True) 933 # ADDP (scalar) 934 twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4, 935 addCode) 936 # ADDP (vector) 937 threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2, 938 addCode, pairwise=True) 939 threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4, 940 addCode, pairwise=True) 941 # ADDV 942 # Note: SimdAddOp can be a bit optimistic here 943 addAcrossCode = "destElem += srcElem1;" 944 twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"), 945 2, addAcrossCode) 946 twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4, 947 addAcrossCode) 948 # AND 949 andCode = "destElem = srcElem1 & srcElem2;" 950 threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode) 951 threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode) 952 # BIC (immediate) 953 bicImmCode = "destElem &= ~imm;" 954 oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2, 955 bicImmCode, True) 956 oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4, 957 bicImmCode, True) 958 # BIC (register) 959 bicCode = "destElem = srcElem1 & ~srcElem2;" 960 threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode) 961 threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode) 962 # BIF 963 bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);" 964 threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode, 965 True) 966 threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode, 967 True) 968 # BIT 969 bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);" 970 threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode, 971 True) 972 threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode, 973 True) 974 # BSL 975 bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);" 976 threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode, 977 True) 978 threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode, 979 True) 980 # CLS 981 clsCode = ''' 982 unsigned count = 0; 983 if (srcElem1 < 0) { 984 srcElem1 <<= 1; 985 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 986 count++; 987 srcElem1 <<= 1; 988 } 989 } else { 990 srcElem1 <<= 1; 991 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 992 count++; 993 srcElem1 <<= 1; 994 } 995 } 996 destElem = count; 997 ''' 998 twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode) 999 twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode) 1000 # CLZ 1001 clzCode = ''' 1002 unsigned count = 0; 1003 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 1004 count++; 1005 srcElem1 <<= 1; 1006 } 1007 destElem = count; 1008 ''' 1009 twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode) 1010 twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode) 1011 # CMEQ (register) 1012 cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;" 1013 threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2, 1014 cmeqCode) 1015 threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4, 1016 cmeqCode) 1017 # CMEQ (zero) 1018 cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;" 1019 twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2, 1020 cmeqZeroCode) 1021 twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4, 1022 cmeqZeroCode) 1023 # CMGE (register) 1024 cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;" 1025 threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode) 1026 threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode) 1027 # CMGE (zero) 1028 cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;" 1029 twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2, 1030 cmgeZeroCode) 1031 twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4, 1032 cmgeZeroCode) 1033 # CMGT (register) 1034 cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;" 1035 threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode) 1036 threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode) 1037 # CMGT (zero) 1038 cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;" 1039 twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2, 1040 cmgtZeroCode) 1041 twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4, 1042 cmgtZeroCode) 1043 # CMHI (register) 1044 threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2, 1045 cmgtCode) 1046 threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4, 1047 cmgtCode) 1048 # CMHS (register) 1049 threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2, 1050 cmgeCode) 1051 threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4, 1052 cmgeCode) 1053 # CMLE (zero) 1054 cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;" 1055 twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2, 1056 cmleZeroCode) 1057 twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4, 1058 cmleZeroCode) 1059 # CMLT (zero) 1060 cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;" 1061 twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2, 1062 cmltZeroCode) 1063 twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4, 1064 cmltZeroCode) 1065 # CMTST (register) 1066 tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;" 1067 threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2, 1068 tstCode) 1069 threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4, 1070 tstCode) 1071 # CNT 1072 cntCode = ''' 1073 unsigned count = 0; 1074 while (srcElem1 && count < sizeof(Element) * 8) { 1075 count += srcElem1 & 0x1; 1076 srcElem1 >>= 1; 1077 } 1078 destElem = count; 1079 ''' 1080 twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode) 1081 twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode) 1082 # DUP (element) 1083 dupCode = "destElem = srcElem1;" 1084 twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2, 1085 dupCode, isDup=True, byElem=True) 1086 twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4, 1087 dupCode, isDup=True, byElem=True) 1088 twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4, 1089 dupCode, isDup=True, byElem=True, scalar=True) 1090 # DUP (general register) 1091 dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W') 1092 dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') 1093 dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X') 1094 # EOR 1095 eorCode = "destElem = srcElem1 ^ srcElem2;" 1096 threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode) 1097 threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode) 1098 # EXT 1099 extCode = ''' 1100 for (unsigned i = 0; i < eCount; i++) { 1101 unsigned index = i + imm; 1102 if (index < eCount) { 1103 destReg.elements[i] = srcReg1.elements[index]; 1104 } else { 1105 index -= eCount; 1106 if (index >= eCount) { 1107 fault = std::make_shared<UndefinedInstruction>( 1108 machInst, false, mnemonic); 1109 } else { 1110 destReg.elements[i] = srcReg2.elements[index]; 1111 } 1112 } 1113 } 1114 ''' 1115 extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode) 1116 extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode) 1117 # FABD 1118 fpOp = ''' 1119 FPSCR fpscr = (FPSCR) FpscrExc; 1120 destElem = %s; 1121 FpscrExc = fpscr; 1122 ''' 1123 fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))" 1124 threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2, 1125 fabdCode) 1126 threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4, 1127 fabdCode) 1128 threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4, 1129 fabdCode, scalar=True) 1130 # FABS 1131 fabsCode = fpOp % "fplibAbs<Element>(srcElem1)" 1132 twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2, 1133 fabsCode) 1134 twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4, 1135 fabsCode) 1136 # FACGE 1137 fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1)," 1138 " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0") 1139 facgeCode = fpCmpAbsOp % "GE" 1140 threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes, 1141 2, facgeCode) 1142 threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4, 1143 facgeCode) 1144 threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4, 1145 facgeCode, scalar=True) 1146 # FACGT 1147 facgtCode = fpCmpAbsOp % "GT" 1148 threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes, 1149 2, facgtCode) 1150 threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4, 1151 facgtCode) 1152 threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4, 1153 facgtCode, scalar=True) 1154 # FADD 1155 fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)" 1156 faddCode = fpBinOp % "Add" 1157 threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2, 1158 faddCode) 1159 threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4, 1160 faddCode) 1161 # FADDP (scalar) 1162 twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp", 1163 ("uint32_t",), 2, faddCode) 1164 twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp", 1165 ("uint64_t",), 4, faddCode) 1166 # FADDP (vector) 1167 threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes, 1168 2, faddCode, pairwise=True) 1169 threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4, 1170 faddCode, pairwise=True) 1171 # FCMEQ (register) 1172 fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?" 1173 " -1 : 0") 1174 fcmeqCode = fpCmpOp % "EQ" 1175 threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes, 1176 2, fcmeqCode) 1177 threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4, 1178 fcmeqCode) 1179 threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4, 1180 fcmeqCode, scalar=True) 1181 # FCMEQ (zero) 1182 fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0" 1183 fcmeqZeroCode = fpCmpZeroOp % "EQ" 1184 twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1185 2, fcmeqZeroCode) 1186 twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1187 fcmeqZeroCode) 1188 twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1189 fcmeqZeroCode, scalar=True) 1190 # FCMGE (register) 1191 fcmgeCode = fpCmpOp % "GE" 1192 threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes, 1193 2, fcmgeCode) 1194 threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4, 1195 fcmgeCode) 1196 threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4, 1197 fcmgeCode, scalar=True) 1198 # FCMGE (zero) 1199 fcmgeZeroCode = fpCmpZeroOp % "GE" 1200 twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1201 2, fcmgeZeroCode) 1202 twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1203 fcmgeZeroCode) 1204 twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1205 fcmgeZeroCode, scalar=True) 1206 # FCMGT (register) 1207 fcmgtCode = fpCmpOp % "GT" 1208 threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes, 1209 2, fcmgtCode) 1210 threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4, 1211 fcmgtCode) 1212 threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4, 1213 fcmgtCode, scalar=True) 1214 # FCMGT (zero) 1215 fcmgtZeroCode = fpCmpZeroOp % "GT" 1216 twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1217 2, fcmgtZeroCode) 1218 twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1219 fcmgtZeroCode) 1220 twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1221 fcmgtZeroCode, scalar=True) 1222 # FCMLE (zero) 1223 fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?" 1224 " -1 : 0") 1225 fcmleZeroCode = fpCmpRevZeroOp % "GE" 1226 twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1227 2, fcmleZeroCode) 1228 twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1229 fcmleZeroCode) 1230 twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1231 fcmleZeroCode, scalar=True) 1232 # FCMLT (zero) 1233 fcmltZeroCode = fpCmpRevZeroOp % "GT" 1234 twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1235 2, fcmltZeroCode) 1236 twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1237 fcmltZeroCode) 1238 twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1239 fcmltZeroCode, scalar=True) 1240 # FCVTAS 1241 fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>(" 1242 "srcElem1, %s, %s, %s, fpscr)") 1243 fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY") 1244 twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2, 1245 fcvtasCode) 1246 twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4, 1247 fcvtasCode) 1248 twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4, 1249 fcvtasCode, scalar=True) 1250 # FCVTAU 1251 fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY") 1252 twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2, 1253 fcvtauCode) 1254 twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4, 1255 fcvtauCode) 1256 twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4, 1257 fcvtauCode, scalar=True) 1258 # FCVTL, FCVTL2 1259 fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>(" 1260 "srcElem1, FPCRRounding(fpscr), fpscr)") 1261 twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"), 1262 fcvtlCode) 1263 twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"), 1264 fcvtlCode, hi=True) 1265 # FCVTMS 1266 fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF") 1267 twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2, 1268 fcvtmsCode) 1269 twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4, 1270 fcvtmsCode) 1271 twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4, 1272 fcvtmsCode, scalar=True) 1273 # FCVTMU 1274 fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF") 1275 twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2, 1276 fcvtmuCode) 1277 twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4, 1278 fcvtmuCode) 1279 twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4, 1280 fcvtmuCode, scalar=True) 1281 # FCVTN, FCVTN2 1282 fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>(" 1283 "srcElem1, FPCRRounding(fpscr), fpscr)") 1284 twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp", 1285 ("uint16_t", "uint32_t"), fcvtnCode) 1286 twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp", 1287 ("uint16_t", "uint32_t"), fcvtnCode, hi=True) 1288 # FCVTNS 1289 fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN") 1290 twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2, 1291 fcvtnsCode) 1292 twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4, 1293 fcvtnsCode) 1294 twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4, 1295 fcvtnsCode, scalar=True) 1296 # FCVTNU 1297 fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN") 1298 twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2, 1299 fcvtnuCode) 1300 twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4, 1301 fcvtnuCode) 1302 twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4, 1303 fcvtnuCode, scalar=True) 1304 # FCVTPS 1305 fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF") 1306 twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2, 1307 fcvtpsCode) 1308 twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4, 1309 fcvtpsCode) 1310 twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4, 1311 fcvtpsCode, scalar=True) 1312 # FCVTPU 1313 fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF") 1314 twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2, 1315 fcvtpuCode) 1316 twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4, 1317 fcvtpuCode) 1318 twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4, 1319 fcvtpuCode, scalar=True) 1320 # FCVTXN, FCVTXN2 1321 fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>(" 1322 "srcElem1, FPRounding_ODD, fpscr)") 1323 twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes, 1324 fcvtxnCode) 1325 twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes, 1326 fcvtxnCode, hi=True) 1327 twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes, 1328 fcvtxnCode, scalar=True) 1329 # FCVTZS (fixed-point) 1330 fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO") 1331 twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes, 1332 2, fcvtzsCode, hasImm=True) 1333 twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4, 1334 fcvtzsCode, hasImm=True) 1335 twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4, 1336 fcvtzsCode, hasImm=True, scalar=True) 1337 # FCVTZS (integer) 1338 fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO") 1339 twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes, 1340 2, fcvtzsIntCode) 1341 twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4, 1342 fcvtzsIntCode) 1343 twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4, 1344 fcvtzsIntCode, scalar=True) 1345 # FCVTZU (fixed-point) 1346 fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO") 1347 twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes, 1348 2, fcvtzuCode, hasImm=True) 1349 twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4, 1350 fcvtzuCode, hasImm=True) 1351 twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4, 1352 fcvtzuCode, hasImm=True, scalar=True) 1353 # FCVTZU (integer) 1354 fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO") 1355 twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2, 1356 fcvtzuIntCode) 1357 twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4, 1358 fcvtzuIntCode) 1359 twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4, 1360 fcvtzuIntCode, scalar=True) 1361 # FDIV 1362 fdivCode = fpBinOp % "Div" 1363 threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2, 1364 fdivCode) 1365 threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4, 1366 fdivCode) 1367 # FMAX 1368 fmaxCode = fpBinOp % "Max" 1369 threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2, 1370 fmaxCode) 1371 threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4, 1372 fmaxCode) 1373 # FMAXNM 1374 fmaxnmCode = fpBinOp % "MaxNum" 1375 threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes, 1376 2, fmaxnmCode) 1377 threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4, 1378 fmaxnmCode) 1379 # FMAXNMP (scalar) 1380 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp", 1381 ("uint32_t",), 2, fmaxnmCode) 1382 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp", 1383 ("uint64_t",), 4, fmaxnmCode) 1384 # FMAXNMP (vector) 1385 threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp", 1386 smallFloatTypes, 2, fmaxnmCode, pairwise=True) 1387 threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4, 1388 fmaxnmCode, pairwise=True) 1389 # FMAXNMV 1390 # Note: SimdFloatCmpOp can be a bit optimistic here 1391 fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)" 1392 fmaxnmAcrossCode = fpAcrossOp % "MaxNum" 1393 twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",), 1394 4, fmaxnmAcrossCode) 1395 # FMAXP (scalar) 1396 twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp", 1397 ("uint32_t",), 2, fmaxCode) 1398 twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp", 1399 ("uint64_t",), 4, fmaxCode) 1400 # FMAXP (vector) 1401 threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes, 1402 2, fmaxCode, pairwise=True) 1403 threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4, 1404 fmaxCode, pairwise=True) 1405 # FMAXV 1406 # Note: SimdFloatCmpOp can be a bit optimistic here 1407 fmaxAcrossCode = fpAcrossOp % "Max" 1408 twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4, 1409 fmaxAcrossCode) 1410 # FMIN 1411 fminCode = fpBinOp % "Min" 1412 threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2, 1413 fminCode) 1414 threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4, 1415 fminCode) 1416 # FMINNM 1417 fminnmCode = fpBinOp % "MinNum" 1418 threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes, 1419 2, fminnmCode) 1420 threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4, 1421 fminnmCode) 1422 # FMINNMP (scalar) 1423 twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp", 1424 ("uint32_t",), 2, fminnmCode) 1425 twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp", 1426 ("uint64_t",), 4, fminnmCode) 1427 # FMINNMP (vector) 1428 threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp", 1429 smallFloatTypes, 2, fminnmCode, pairwise=True) 1430 threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4, 1431 fminnmCode, pairwise=True) 1432 # FMINNMV 1433 # Note: SimdFloatCmpOp can be a bit optimistic here 1434 fminnmAcrossCode = fpAcrossOp % "MinNum" 1435 twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",), 1436 4, fminnmAcrossCode) 1437 # FMINP (scalar) 1438 twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp", 1439 ("uint32_t",), 2, fminCode) 1440 twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp", 1441 ("uint64_t",), 4, fminCode) 1442 # FMINP (vector) 1443 threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes, 1444 2, fminCode, pairwise=True) 1445 threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4, 1446 fminCode, pairwise=True) 1447 # FMINV 1448 # Note: SimdFloatCmpOp can be a bit optimistic here 1449 fminAcrossCode = fpAcrossOp % "Min" 1450 twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4, 1451 fminAcrossCode) 1452 # FMLA (by element) 1453 fmlaCode = fpOp % ("fplibMulAdd<Element>(" 1454 "destElem, srcElem1, srcElem2, fpscr)") 1455 threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp", 1456 smallFloatTypes, 2, fmlaCode, True, byElem=True) 1457 threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes, 1458 4, fmlaCode, True, byElem=True) 1459 threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes, 1460 4, fmlaCode, True, byElem=True, scalar=True) 1461 # FMLA (vector) 1462 threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes, 1463 2, fmlaCode, True) 1464 threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4, 1465 fmlaCode, True) 1466 # FMLS (by element) 1467 fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem," 1468 " fplibNeg<Element>(srcElem1), srcElem2, fpscr)") 1469 threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp", 1470 smallFloatTypes, 2, fmlsCode, True, byElem=True) 1471 threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes, 1472 4, fmlsCode, True, byElem=True) 1473 threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes, 1474 4, fmlsCode, True, byElem=True, scalar=True) 1475 # FMLS (vector) 1476 threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes, 1477 2, fmlsCode, True) 1478 threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4, 1479 fmlsCode, True) 1480 # FMOV 1481 fmovCode = 'destElem = imm;' 1482 oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2, 1483 fmovCode) 1484 oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode) 1485 # FMUL (by element) 1486 fmulCode = fpBinOp % "Mul" 1487 threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp", 1488 smallFloatTypes, 2, fmulCode, byElem=True) 1489 threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4, 1490 fmulCode, byElem=True) 1491 threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4, 1492 fmulCode, byElem=True, scalar=True) 1493 # FMUL (vector) 1494 threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2, 1495 fmulCode) 1496 threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4, 1497 fmulCode) 1498 # FMULX 1499 fmulxCode = fpBinOp % "MulX" 1500 threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes, 1501 2, fmulxCode) 1502 threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4, 1503 fmulxCode) 1504 threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4, 1505 fmulxCode, scalar=True) 1506 # FMULX (by element) 1507 threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp", 1508 smallFloatTypes, 2, fmulxCode, byElem=True) 1509 threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes, 1510 4, fmulxCode, byElem=True) 1511 threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes, 1512 4, fmulxCode, byElem=True, scalar=True) 1513 # FNEG 1514 fnegCode = fpOp % "fplibNeg<Element>(srcElem1)" 1515 twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2, 1516 fnegCode) 1517 twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4, 1518 fnegCode) 1519 # FRECPE 1520 frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)" 1521 twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp", 1522 smallFloatTypes, 2, frecpeCode) 1523 twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4, 1524 frecpeCode) 1525 twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes, 1526 4, frecpeCode, scalar=True) 1527 # FRECPS 1528 frecpsCode = fpBinOp % "RecipStepFused" 1529 threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp", 1530 smallFloatTypes, 2, frecpsCode) 1531 threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes, 1532 4, frecpsCode) 1533 threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes, 1534 4, frecpsCode, scalar=True) 1535 # FRECPX 1536 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)" 1537 twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4, 1538 frecpxCode, scalar=True) 1539 # FRINTA 1540 frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)" 1541 frintaCode = frintCode % ("FPRounding_TIEAWAY", "false") 1542 twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2, 1543 frintaCode) 1544 twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4, 1545 frintaCode) 1546 # FRINTI 1547 frintiCode = frintCode % ("FPCRRounding(fpscr)", "false") 1548 twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2, 1549 frintiCode) 1550 twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4, 1551 frintiCode) 1552 # FRINTM 1553 frintmCode = frintCode % ("FPRounding_NEGINF", "false") 1554 twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2, 1555 frintmCode) 1556 twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4, 1557 frintmCode) 1558 # FRINTN 1559 frintnCode = frintCode % ("FPRounding_TIEEVEN", "false") 1560 twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2, 1561 frintnCode) 1562 twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4, 1563 frintnCode) 1564 # FRINTP 1565 frintpCode = frintCode % ("FPRounding_POSINF", "false") 1566 twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2, 1567 frintpCode) 1568 twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4, 1569 frintpCode) 1570 # FRINTX 1571 frintxCode = frintCode % ("FPCRRounding(fpscr)", "true") 1572 twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2, 1573 frintxCode) 1574 twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4, 1575 frintxCode) 1576 # FRINTZ 1577 frintzCode = frintCode % ("FPRounding_ZERO", "false") 1578 twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2, 1579 frintzCode) 1580 twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4, 1581 frintzCode) 1582 # FRSQRTE 1583 frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)" 1584 twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp", 1585 smallFloatTypes, 2, frsqrteCode) 1586 twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4, 1587 frsqrteCode) 1588 twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4, 1589 frsqrteCode, scalar=True) 1590 # FRSQRTS 1591 frsqrtsCode = fpBinOp % "RSqrtStepFused" 1592 threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp", 1593 smallFloatTypes, 2, frsqrtsCode) 1594 threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes, 1595 4, frsqrtsCode) 1596 threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes, 1597 4, frsqrtsCode, scalar=True) 1598 # FSQRT 1599 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)" 1600 twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2, 1601 fsqrtCode) 1602 twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4, 1603 fsqrtCode) 1604 # FSUB 1605 fsubCode = fpBinOp % "Sub" 1606 threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2, 1607 fsubCode) 1608 threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4, 1609 fsubCode) 1610 # INS (element) 1611 insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4) 1612 # INS (general register) 1613 insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4, 1614 'W') 1615 insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X') 1616 # MLA (by element) 1617 mlaCode = "destElem += srcElem1 * srcElem2;" 1618 threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp", 1619 ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True) 1620 threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp", 1621 ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True) 1622 # MLA (vector) 1623 threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2, 1624 mlaCode, True) 1625 threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4, 1626 mlaCode, True) 1627 # MLS (by element) 1628 mlsCode = "destElem -= srcElem1 * srcElem2;" 1629 threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp", 1630 ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True) 1631 threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp", 1632 ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True) 1633 # MLS (vector) 1634 threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2, 1635 mlsCode, True) 1636 threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4, 1637 mlsCode, True) 1638 # MOV (element) -> alias to INS (element) 1639 # MOV (from general) -> alias to INS (general register) 1640 # MOV (scalar) -> alias to DUP (element) 1641 # MOV (to general) -> alias to UMOV 1642 # MOV (vector) -> alias to ORR (register) 1643 # MOVI 1644 movImmCode = "destElem = imm;" 1645 oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2, 1646 movImmCode) 1647 oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4, 1648 movImmCode) 1649 # MUL (by element) 1650 mulCode = "destElem = srcElem1 * srcElem2;" 1651 threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp", 1652 ("uint16_t", "uint32_t"), 2, mulCode, byElem=True) 1653 threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp", 1654 ("uint16_t", "uint32_t"), 4, mulCode, byElem=True) 1655 # MUL (vector) 1656 threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2, 1657 mulCode) 1658 threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4, 1659 mulCode) 1660 # MVN 1661 mvnCode = "destElem = ~srcElem1;" 1662 twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode) 1663 twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode) 1664 # MVNI 1665 mvniCode = "destElem = ~imm;" 1666 oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode) 1667 oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode) 1668 # NEG 1669 negCode = "destElem = -srcElem1;" 1670 twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode) 1671 twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode) 1672 # NOT -> alias to MVN 1673 # ORN 1674 ornCode = "destElem = srcElem1 | ~srcElem2;" 1675 threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode) 1676 threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode) 1677 # ORR (immediate) 1678 orrImmCode = "destElem |= imm;" 1679 oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2, 1680 orrImmCode, True) 1681 oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4, 1682 orrImmCode, True) 1683 # ORR (register) 1684 orrCode = "destElem = srcElem1 | srcElem2;" 1685 threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode) 1686 threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode) 1687 # PMUL 1688 pmulCode = ''' 1689 destElem = 0; 1690 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 1691 if (bits(srcElem2, j)) 1692 destElem ^= srcElem1 << j; 1693 } 1694 ''' 1695 threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2, 1696 pmulCode) 1697 threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4, 1698 pmulCode) 1699 # PMULL, PMULL2 1700 # Note: 64-bit PMULL is not available (Crypto. Extension) 1701 pmullCode = ''' 1702 destElem = 0; 1703 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 1704 if (bits(srcElem2, j)) 1705 destElem ^= (BigElement)srcElem1 << j; 1706 } 1707 ''' 1708 threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode) 1709 threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",), 1710 pmullCode, hi=True) 1711 # RADDHN, RADDHN2 1712 raddhnCode = ''' 1713 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1714 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1715 (sizeof(Element) * 8); 1716 ''' 1717 threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes, 1718 raddhnCode) 1719 threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes, 1720 raddhnCode, hi=True) 1721 # RBIT 1722 rbitCode = ''' 1723 destElem = 0; 1724 Element temp = srcElem1; 1725 for (int i = 0; i < 8 * sizeof(Element); i++) { 1726 destElem = destElem | ((temp & 0x1) << 1727 (8 * sizeof(Element) - 1 - i)); 1728 temp >>= 1; 1729 } 1730 ''' 1731 twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode) 1732 twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode) 1733 # REV16 1734 rev16Code = ''' 1735 destElem = srcElem1; 1736 unsigned groupSize = ((1 << 1) / sizeof(Element)); 1737 unsigned reverseMask = (groupSize - 1); 1738 j = i ^ reverseMask; 1739 ''' 1740 twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2, 1741 rev16Code) 1742 twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4, 1743 rev16Code) 1744 # REV32 1745 rev32Code = ''' 1746 destElem = srcElem1; 1747 unsigned groupSize = ((1 << 2) / sizeof(Element)); 1748 unsigned reverseMask = (groupSize - 1); 1749 j = i ^ reverseMask; 1750 ''' 1751 twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"), 1752 2, rev32Code) 1753 twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"), 1754 4, rev32Code) 1755 # REV64 1756 rev64Code = ''' 1757 destElem = srcElem1; 1758 unsigned groupSize = ((1 << 3) / sizeof(Element)); 1759 unsigned reverseMask = (groupSize - 1); 1760 j = i ^ reverseMask; 1761 ''' 1762 twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2, 1763 rev64Code) 1764 twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4, 1765 rev64Code) 1766 # RSHRN, RSHRN2 1767 rshrnCode = ''' 1768 if (imm > sizeof(srcElem1) * 8) { 1769 destElem = 0; 1770 } else if (imm) { 1771 Element rBit = bits(srcElem1, imm - 1); 1772 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 1773 } else { 1774 destElem = srcElem1; 1775 } 1776 ''' 1777 twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes, 1778 rshrnCode, hasImm=True) 1779 twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes, 1780 rshrnCode, hasImm=True, hi=True) 1781 # RSUBHN, RSUBHN2 1782 rsubhnCode = ''' 1783 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1784 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1785 (sizeof(Element) * 8); 1786 ''' 1787 threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes, 1788 rsubhnCode) 1789 threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes, 1790 rsubhnCode, hi=True) 1791 # SABA 1792 abaCode = ''' 1793 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1794 (srcElem2 - srcElem1); 1795 ''' 1796 threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2, 1797 abaCode, True) 1798 threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4, 1799 abaCode, True) 1800 # SABAL, SABAL2 1801 abalCode = ''' 1802 destElem += (srcElem1 > srcElem2) ? 1803 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1804 ((BigElement)srcElem2 - (BigElement)srcElem1); 1805 ''' 1806 threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes, 1807 abalCode, True) 1808 threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes, 1809 abalCode, True, hi=True) 1810 # SABD 1811 abdCode = ''' 1812 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1813 (srcElem2 - srcElem1); 1814 ''' 1815 threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2, 1816 abdCode) 1817 threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4, 1818 abdCode) 1819 # SABDL, SABDL2 1820 abdlCode = ''' 1821 destElem = (srcElem1 > srcElem2) ? 1822 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1823 ((BigElement)srcElem2 - (BigElement)srcElem1); 1824 ''' 1825 threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes, 1826 abdlCode, True) 1827 threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes, 1828 abdlCode, True, hi=True) 1829 # SADALP 1830 adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;" 1831 twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2, 1832 adalpCode, True) 1833 twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4, 1834 adalpCode, True) 1835 # SADDL, SADDL2 1836 addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;" 1837 threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes, 1838 addlwCode) 1839 threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes, 1840 addlwCode, hi=True) 1841 # SADDLP 1842 twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2, 1843 addlwCode) 1844 twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4, 1845 addlwCode) 1846 # SADDLV 1847 # Note: SimdAddOp can be a bit optimistic here 1848 addAcrossLongCode = "destElem += (BigElement)srcElem1;" 1849 twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"), 1850 2, addAcrossLongCode, long=True) 1851 twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"), 1852 4, addAcrossLongCode, long=True) 1853 twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4, 1854 addAcrossLongCode, doubleDest=True, long=True) 1855 # SADDW, SADDW2 1856 threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes, 1857 addlwCode) 1858 threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes, 1859 addlwCode, hi=True) 1860 # SCVTF (fixed-point) 1861 scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm," 1862 " false, FPCRRounding(fpscr), fpscr)") 1863 twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, 1864 scvtfFixedCode % 32, hasImm=True) 1865 twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4, 1866 scvtfFixedCode % 32, hasImm=True) 1867 twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4, 1868 scvtfFixedCode % 64, hasImm=True) 1869 twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes, 1870 4, scvtfFixedCode % 32, hasImm=True, scalar=True) 1871 twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4, 1872 scvtfFixedCode % 64, hasImm=True, scalar=True) 1873 # SCVTF (integer) 1874 scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0," 1875 " false, FPCRRounding(fpscr), fpscr)") 1876 twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, 1877 scvtfIntCode % 32) 1878 twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4, 1879 scvtfIntCode % 32) 1880 twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4, 1881 scvtfIntCode % 64) 1882 twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4, 1883 scvtfIntCode % 32, scalar=True) 1884 twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4, 1885 scvtfIntCode % 64, scalar=True) 1886 # SHADD 1887 haddCode = ''' 1888 Element carryBit = 1889 (((unsigned)srcElem1 & 0x1) + 1890 ((unsigned)srcElem2 & 0x1)) >> 1; 1891 // Use division instead of a shift to ensure the sign extension works 1892 // right. The compiler will figure out if it can be a shift. Mask the 1893 // inputs so they get truncated correctly. 1894 destElem = (((srcElem1 & ~(Element)1) / 2) + 1895 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1896 ''' 1897 threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2, 1898 haddCode) 1899 threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4, 1900 haddCode) 1901 # SHL 1902 shlCode = ''' 1903 if (imm >= sizeof(Element) * 8) 1904 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 1905 else 1906 destElem = srcElem1 << imm; 1907 ''' 1908 twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode, 1909 hasImm=True) 1910 twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode, 1911 hasImm=True) 1912 # SHLL, SHLL2 1913 shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);" 1914 twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode) 1915 twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode, 1916 hi=True) 1917 # SHRN, SHRN2 1918 shrnCode = ''' 1919 if (imm >= sizeof(srcElem1) * 8) { 1920 destElem = 0; 1921 } else { 1922 destElem = srcElem1 >> imm; 1923 } 1924 ''' 1925 twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes, 1926 shrnCode, hasImm=True) 1927 twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes, 1928 shrnCode, hasImm=True, hi=True) 1929 # SHSUB 1930 hsubCode = ''' 1931 Element borrowBit = 1932 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1933 // Use division instead of a shift to ensure the sign extension works 1934 // right. The compiler will figure out if it can be a shift. Mask the 1935 // inputs so they get truncated correctly. 1936 destElem = (((srcElem1 & ~(Element)1) / 2) - 1937 ((srcElem2 & ~(Element)1) / 2)) - borrowBit; 1938 ''' 1939 threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2, 1940 hsubCode) 1941 threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4, 1942 hsubCode) 1943 # SLI 1944 sliCode = ''' 1945 if (imm >= sizeof(Element) * 8) 1946 destElem = destElem; 1947 else 1948 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 1949 ''' 1950 twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode, 1951 True, hasImm=True) 1952 twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode, 1953 True, hasImm=True) 1954 # SMAX 1955 maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;" 1956 threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2, 1957 maxCode) 1958 threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4, 1959 maxCode) 1960 # SMAXP 1961 threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2, 1962 maxCode, pairwise=True) 1963 threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4, 1964 maxCode, pairwise=True) 1965 # SMAXV 1966 maxAcrossCode = ''' 1967 if (i == 0 || srcElem1 > destElem) 1968 destElem = srcElem1; 1969 ''' 1970 twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"), 1971 2, maxAcrossCode) 1972 twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4, 1973 maxAcrossCode) 1974 # SMIN 1975 minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;" 1976 threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2, 1977 minCode) 1978 threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4, 1979 minCode) 1980 # SMINP 1981 threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2, 1982 minCode, pairwise=True) 1983 threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4, 1984 minCode, pairwise=True) 1985 # SMINV 1986 minAcrossCode = ''' 1987 if (i == 0 || srcElem1 < destElem) 1988 destElem = srcElem1; 1989 ''' 1990 twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"), 1991 2, minAcrossCode) 1992 twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4, 1993 minAcrossCode) 1994 1995 split('exec') 1996 1997 # SMLAL, SMLAL2 (by element) 1998 mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;" 1999 threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp", 2000 ("int16_t", "int32_t"), mlalCode, True, byElem=True) 2001 threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp", 2002 ("int16_t", "int32_t"), mlalCode, True, byElem=True, 2003 hi=True) 2004 # SMLAL, SMLAL2 (vector) 2005 threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes, 2006 mlalCode, True) 2007 threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes, 2008 mlalCode, True, hi=True) 2009 # SMLSL, SMLSL2 (by element) 2010 mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;" 2011 threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes, 2012 mlslCode, True, byElem=True) 2013 threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp", 2014 smallSignedTypes, mlslCode, True, byElem=True, hi=True) 2015 # SMLSL, SMLSL2 (vector) 2016 threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes, 2017 mlslCode, True) 2018 threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes, 2019 mlslCode, True, hi=True) 2020 # SMOV 2021 insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4, 2022 'W', True) 2023 insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X', 2024 True) 2025 # SMULL, SMULL2 (by element) 2026 mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;" 2027 threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes, 2028 mullCode, byElem=True) 2029 threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes, 2030 mullCode, byElem=True, hi=True) 2031 # SMULL, SMULL2 (vector) 2032 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes, 2033 mullCode) 2034 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes, 2035 mullCode, hi=True) 2036 # SQABS 2037 sqabsCode = ''' 2038 FPSCR fpscr = (FPSCR) FpscrQc; 2039 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 2040 fpscr.qc = 1; 2041 destElem = ~srcElem1; 2042 } else if (srcElem1 < 0) { 2043 destElem = -srcElem1; 2044 } else { 2045 destElem = srcElem1; 2046 } 2047 FpscrQc = fpscr; 2048 ''' 2049 twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2, 2050 sqabsCode) 2051 twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4, 2052 sqabsCode) 2053 twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4, 2054 sqabsCode, scalar=True) 2055 # SQADD 2056 sqaddCode = ''' 2057 destElem = srcElem1 + srcElem2; 2058 FPSCR fpscr = (FPSCR) FpscrQc; 2059 bool negDest = (destElem < 0); 2060 bool negSrc1 = (srcElem1 < 0); 2061 bool negSrc2 = (srcElem2 < 0); 2062 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 2063 destElem = std::numeric_limits<Element>::min(); 2064 if (negDest) 2065 destElem -= 1; 2066 fpscr.qc = 1; 2067 } 2068 FpscrQc = fpscr; 2069 ''' 2070 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2, 2071 sqaddCode) 2072 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4, 2073 sqaddCode) 2074 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4, 2075 sqaddCode, scalar=True) 2076 # SQDMLAL, SQDMLAL2 (by element) 2077 qdmlalCode = ''' 2078 FPSCR fpscr = (FPSCR) FpscrQc; 2079 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2080 Element maxNeg = std::numeric_limits<Element>::min(); 2081 Element halfNeg = maxNeg / 2; 2082 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2083 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2084 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2085 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2086 fpscr.qc = 1; 2087 } 2088 bool negPreDest = ltz(destElem); 2089 destElem += midElem; 2090 bool negDest = ltz(destElem); 2091 bool negMid = ltz(midElem); 2092 if (negPreDest == negMid && negMid != negDest) { 2093 destElem = mask(sizeof(BigElement) * 8 - 1); 2094 if (negPreDest) 2095 destElem = ~destElem; 2096 fpscr.qc = 1; 2097 } 2098 FpscrQc = fpscr; 2099 ''' 2100 threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp", 2101 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True) 2102 threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp", 2103 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, 2104 hi=True) 2105 threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp", 2106 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, 2107 scalar=True) 2108 # SQDMLAL, SQDMLAL2 (vector) 2109 threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp", 2110 ("int16_t", "int32_t"), qdmlalCode, True) 2111 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp", 2112 ("int16_t", "int32_t"), qdmlalCode, True, hi=True) 2113 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp", 2114 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True) 2115 # SQDMLSL, SQDMLSL2 (by element) 2116 qdmlslCode = ''' 2117 FPSCR fpscr = (FPSCR) FpscrQc; 2118 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2119 Element maxNeg = std::numeric_limits<Element>::min(); 2120 Element halfNeg = maxNeg / 2; 2121 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2122 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2123 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2124 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2125 fpscr.qc = 1; 2126 } 2127 bool negPreDest = ltz(destElem); 2128 destElem -= midElem; 2129 bool negDest = ltz(destElem); 2130 bool posMid = ltz((BigElement)-midElem); 2131 if (negPreDest == posMid && posMid != negDest) { 2132 destElem = mask(sizeof(BigElement) * 8 - 1); 2133 if (negPreDest) 2134 destElem = ~destElem; 2135 fpscr.qc = 1; 2136 } 2137 FpscrQc = fpscr; 2138 ''' 2139 threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp", 2140 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True) 2141 threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp", 2142 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, 2143 hi=True) 2144 threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp", 2145 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, 2146 scalar=True) 2147 # SQDMLSL, SQDMLSL2 (vector) 2148 threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp", 2149 ("int16_t", "int32_t"), qdmlslCode, True) 2150 threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp", 2151 ("int16_t", "int32_t"), qdmlslCode, True, hi=True) 2152 threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp", 2153 ("int16_t", "int32_t"), qdmlslCode, True, scalar=True) 2154 # SQDMULH (by element) 2155 sqdmulhCode = ''' 2156 FPSCR fpscr = (FPSCR) FpscrQc; 2157 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2158 (sizeof(Element) * 8); 2159 if (srcElem1 == srcElem2 && 2160 srcElem1 == (Element)((Element)1 << 2161 (sizeof(Element) * 8 - 1))) { 2162 destElem = ~srcElem1; 2163 fpscr.qc = 1; 2164 } 2165 FpscrQc = fpscr; 2166 ''' 2167 threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp", 2168 ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True) 2169 threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp", 2170 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True) 2171 threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp", 2172 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True, 2173 scalar=True) 2174 # SQDMULH (vector) 2175 threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp", 2176 ("int16_t", "int32_t"), 2, sqdmulhCode) 2177 threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp", 2178 ("int16_t", "int32_t"), 4, sqdmulhCode) 2179 threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp", 2180 ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True) 2181 # SQDMULL, SQDMULL2 (by element) 2182 qdmullCode = ''' 2183 FPSCR fpscr = (FPSCR) FpscrQc; 2184 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2185 if (srcElem1 == srcElem2 && 2186 srcElem1 == (Element)((Element)1 << 2187 (Element)(sizeof(Element) * 8 - 1))) { 2188 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2189 fpscr.qc = 1; 2190 } 2191 FpscrQc = fpscr; 2192 ''' 2193 threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp", 2194 ("int16_t", "int32_t"), qdmullCode, True, byElem=True) 2195 threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp", 2196 ("int16_t", "int32_t"), qdmullCode, True, byElem=True, 2197 hi=True) 2198 threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp", 2199 ("int16_t", "int32_t"), qdmullCode, True, byElem=True, 2200 scalar=True) 2201 # SQDMULL, SQDMULL2 (vector) 2202 threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp", 2203 ("int16_t", "int32_t"), qdmullCode, True) 2204 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp", 2205 ("int16_t", "int32_t"), qdmullCode, True, hi=True) 2206 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp", 2207 ("int16_t", "int32_t"), qdmullCode, True, scalar=True) 2208 # SQNEG 2209 sqnegCode = ''' 2210 FPSCR fpscr = (FPSCR) FpscrQc; 2211 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) { 2212 fpscr.qc = 1; 2213 destElem = ~srcElem1; 2214 } else { 2215 destElem = -srcElem1; 2216 } 2217 FpscrQc = fpscr; 2218 ''' 2219 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2, 2220 sqnegCode) 2221 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4, 2222 sqnegCode) 2223 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4, 2224 sqnegCode, scalar=True) 2225 # SQRDMULH (by element) 2226 sqrdmulhCode = ''' 2227 FPSCR fpscr = (FPSCR) FpscrQc; 2228 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2229 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2230 (sizeof(Element) * 8); 2231 Element maxNeg = std::numeric_limits<Element>::min(); 2232 Element halfNeg = maxNeg / 2; 2233 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2234 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2235 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2236 if (destElem < 0) { 2237 destElem = mask(sizeof(Element) * 8 - 1); 2238 } else { 2239 destElem = std::numeric_limits<Element>::min(); 2240 } 2241 fpscr.qc = 1; 2242 } 2243 FpscrQc = fpscr; 2244 ''' 2245 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp", 2246 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True) 2247 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp", 2248 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True) 2249 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp", 2250 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True, 2251 scalar=True) 2252 # SQRDMULH (vector) 2253 threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp", 2254 ("int16_t", "int32_t"), 2, sqrdmulhCode) 2255 threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp", 2256 ("int16_t", "int32_t"), 4, sqrdmulhCode) 2257 threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp", 2258 ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True) 2259 # SQRSHL 2260 sqrshlCode = ''' 2261 int16_t shiftAmt = (int8_t)srcElem2; 2262 FPSCR fpscr = (FPSCR) FpscrQc; 2263 if (shiftAmt < 0) { 2264 shiftAmt = -shiftAmt; 2265 Element rBit = 0; 2266 if (shiftAmt <= sizeof(Element) * 8) 2267 rBit = bits(srcElem1, shiftAmt - 1); 2268 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 2269 rBit = 1; 2270 if (shiftAmt >= sizeof(Element) * 8) { 2271 shiftAmt = sizeof(Element) * 8 - 1; 2272 destElem = 0; 2273 } else { 2274 destElem = (srcElem1 >> shiftAmt); 2275 } 2276 // Make sure the right shift sign extended when it should. 2277 if (srcElem1 < 0 && destElem >= 0) { 2278 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2279 1 - shiftAmt)); 2280 } 2281 destElem += rBit; 2282 } else if (shiftAmt > 0) { 2283 bool sat = false; 2284 if (shiftAmt >= sizeof(Element) * 8) { 2285 if (srcElem1 != 0) 2286 sat = true; 2287 else 2288 destElem = 0; 2289 } else { 2290 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, 2291 sizeof(Element) * 8 - 1 - shiftAmt) != 2292 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2293 sat = true; 2294 } else { 2295 destElem = srcElem1 << shiftAmt; 2296 } 2297 } 2298 if (sat) { 2299 fpscr.qc = 1; 2300 destElem = mask(sizeof(Element) * 8 - 1); 2301 if (srcElem1 < 0) 2302 destElem = ~destElem; 2303 } 2304 } else { 2305 destElem = srcElem1; 2306 } 2307 FpscrQc = fpscr; 2308 ''' 2309 threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2, 2310 sqrshlCode) 2311 threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4, 2312 sqrshlCode) 2313 threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4, 2314 sqrshlCode, scalar=True) 2315 # SQRSHRN, SQRSHRN2 2316 sqrshrnCode = ''' 2317 FPSCR fpscr = (FPSCR) FpscrQc; 2318 if (imm > sizeof(srcElem1) * 8) { 2319 if (srcElem1 != 0 && srcElem1 != -1) 2320 fpscr.qc = 1; 2321 destElem = 0; 2322 } else if (imm) { 2323 BigElement mid = (srcElem1 >> (imm - 1)); 2324 uint64_t rBit = mid & 0x1; 2325 mid >>= 1; 2326 mid |= -(mid & ((BigElement)1 << 2327 (sizeof(BigElement) * 8 - 1 - imm))); 2328 mid += rBit; 2329 if (mid != (Element)mid) { 2330 destElem = mask(sizeof(Element) * 8 - 1); 2331 if (srcElem1 < 0) 2332 destElem = ~destElem; 2333 fpscr.qc = 1; 2334 } else { 2335 destElem = mid; 2336 } 2337 } else { 2338 if (srcElem1 != (Element)srcElem1) { 2339 destElem = mask(sizeof(Element) * 8 - 1); 2340 if (srcElem1 < 0) 2341 destElem = ~destElem; 2342 fpscr.qc = 1; 2343 } else { 2344 destElem = srcElem1; 2345 } 2346 } 2347 FpscrQc = fpscr; 2348 ''' 2349 twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes, 2350 sqrshrnCode, hasImm=True) 2351 twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes, 2352 sqrshrnCode, hasImm=True, hi=True) 2353 twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes, 2354 sqrshrnCode, hasImm=True, scalar=True) 2355 # SQRSHRUN, SQRSHRUN2 2356 sqrshrunCode = ''' 2357 FPSCR fpscr = (FPSCR) FpscrQc; 2358 if (imm > sizeof(srcElem1) * 8) { 2359 if (srcElem1 != 0) 2360 fpscr.qc = 1; 2361 destElem = 0; 2362 } else if (imm) { 2363 BigElement mid = (srcElem1 >> (imm - 1)); 2364 uint64_t rBit = mid & 0x1; 2365 mid >>= 1; 2366 mid |= -(mid & ((BigElement)1 << 2367 (sizeof(BigElement) * 8 - 1 - imm))); 2368 mid += rBit; 2369 if (bits(mid, sizeof(BigElement) * 8 - 1, 2370 sizeof(Element) * 8) != 0) { 2371 if (srcElem1 < 0) { 2372 destElem = 0; 2373 } else { 2374 destElem = mask(sizeof(Element) * 8); 2375 } 2376 fpscr.qc = 1; 2377 } else { 2378 destElem = mid; 2379 } 2380 } else { 2381 if (srcElem1 < 0) { 2382 fpscr.qc = 1; 2383 destElem = 0; 2384 } else { 2385 destElem = srcElem1; 2386 } 2387 } 2388 FpscrQc = fpscr; 2389 ''' 2390 twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes, 2391 sqrshrunCode, hasImm=True) 2392 twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp", 2393 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True) 2394 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp", 2395 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True) 2396 # SQSHL (immediate) 2397 sqshlImmCode = ''' 2398 FPSCR fpscr = (FPSCR) FpscrQc; 2399 if (imm >= sizeof(Element) * 8) { 2400 if (srcElem1 != 0) { 2401 destElem = std::numeric_limits<Element>::min(); 2402 if (srcElem1 > 0) 2403 destElem = ~destElem; 2404 fpscr.qc = 1; 2405 } else { 2406 destElem = 0; 2407 } 2408 } else if (imm) { 2409 destElem = (srcElem1 << imm); 2410 uint64_t topBits = bits((uint64_t)srcElem1, 2411 sizeof(Element) * 8 - 1, 2412 sizeof(Element) * 8 - 1 - imm); 2413 if (topBits != 0 && topBits != mask(imm + 1)) { 2414 destElem = std::numeric_limits<Element>::min(); 2415 if (srcElem1 > 0) 2416 destElem = ~destElem; 2417 fpscr.qc = 1; 2418 } 2419 } else { 2420 destElem = srcElem1; 2421 } 2422 FpscrQc = fpscr; 2423 ''' 2424 twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2, 2425 sqshlImmCode, hasImm=True) 2426 twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4, 2427 sqshlImmCode, hasImm=True) 2428 twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4, 2429 sqshlImmCode, hasImm=True, scalar=True) 2430 # SQSHL (register) 2431 sqshlCode = ''' 2432 int16_t shiftAmt = (int8_t)srcElem2; 2433 FPSCR fpscr = (FPSCR) FpscrQc; 2434 if (shiftAmt < 0) { 2435 shiftAmt = -shiftAmt; 2436 if (shiftAmt >= sizeof(Element) * 8) { 2437 shiftAmt = sizeof(Element) * 8 - 1; 2438 destElem = 0; 2439 } else { 2440 destElem = (srcElem1 >> shiftAmt); 2441 } 2442 // Make sure the right shift sign extended when it should. 2443 if (srcElem1 < 0 && destElem >= 0) { 2444 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2445 1 - shiftAmt)); 2446 } 2447 } else if (shiftAmt > 0) { 2448 bool sat = false; 2449 if (shiftAmt >= sizeof(Element) * 8) { 2450 if (srcElem1 != 0) 2451 sat = true; 2452 else 2453 destElem = 0; 2454 } else { 2455 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, 2456 sizeof(Element) * 8 - 1 - shiftAmt) != 2457 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2458 sat = true; 2459 } else { 2460 destElem = srcElem1 << shiftAmt; 2461 } 2462 } 2463 if (sat) { 2464 fpscr.qc = 1; 2465 destElem = mask(sizeof(Element) * 8 - 1); 2466 if (srcElem1 < 0) 2467 destElem = ~destElem; 2468 } 2469 } else { 2470 destElem = srcElem1; 2471 } 2472 FpscrQc = fpscr; 2473 ''' 2474 threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2, 2475 sqshlCode) 2476 threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4, 2477 sqshlCode) 2478 threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4, 2479 sqshlCode, scalar=True) 2480 # SQSHLU 2481 sqshluCode = ''' 2482 FPSCR fpscr = (FPSCR) FpscrQc; 2483 if (imm >= sizeof(Element) * 8) { 2484 if (srcElem1 < 0) { 2485 destElem = 0; 2486 fpscr.qc = 1; 2487 } else if (srcElem1 > 0) { 2488 destElem = mask(sizeof(Element) * 8); 2489 fpscr.qc = 1; 2490 } else { 2491 destElem = 0; 2492 } 2493 } else if (imm) { 2494 destElem = (srcElem1 << imm); 2495 uint64_t topBits = bits((uint64_t)srcElem1, 2496 sizeof(Element) * 8 - 1, 2497 sizeof(Element) * 8 - imm); 2498 if (srcElem1 < 0) { 2499 destElem = 0; 2500 fpscr.qc = 1; 2501 } else if (topBits != 0) { 2502 destElem = mask(sizeof(Element) * 8); 2503 fpscr.qc = 1; 2504 } 2505 } else { 2506 if (srcElem1 < 0) { 2507 fpscr.qc = 1; 2508 destElem = 0; 2509 } else { 2510 destElem = srcElem1; 2511 } 2512 } 2513 FpscrQc = fpscr; 2514 ''' 2515 twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2, 2516 sqshluCode, hasImm=True) 2517 twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4, 2518 sqshluCode, hasImm=True) 2519 twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4, 2520 sqshluCode, hasImm=True, scalar=True) 2521 # SQSHRN, SQSHRN2 2522 sqshrnCode = ''' 2523 FPSCR fpscr = (FPSCR) FpscrQc; 2524 if (imm > sizeof(srcElem1) * 8) { 2525 if (srcElem1 != 0 && srcElem1 != -1) 2526 fpscr.qc = 1; 2527 destElem = 0; 2528 } else if (imm) { 2529 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2530 mid |= -(mid & ((BigElement)1 << 2531 (sizeof(BigElement) * 8 - 1 - imm))); 2532 if (mid != (Element)mid) { 2533 destElem = mask(sizeof(Element) * 8 - 1); 2534 if (srcElem1 < 0) 2535 destElem = ~destElem; 2536 fpscr.qc = 1; 2537 } else { 2538 destElem = mid; 2539 } 2540 } else { 2541 destElem = srcElem1; 2542 } 2543 FpscrQc = fpscr; 2544 ''' 2545 twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes, 2546 sqshrnCode, hasImm=True) 2547 twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes, 2548 sqshrnCode, hasImm=True, hi=True) 2549 twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes, 2550 sqshrnCode, hasImm=True, scalar=True) 2551 # SQSHRUN, SQSHRUN2 2552 sqshrunCode = ''' 2553 FPSCR fpscr = (FPSCR) FpscrQc; 2554 if (imm > sizeof(srcElem1) * 8) { 2555 if (srcElem1 != 0) 2556 fpscr.qc = 1; 2557 destElem = 0; 2558 } else if (imm) { 2559 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2560 if (bits(mid, sizeof(BigElement) * 8 - 1, 2561 sizeof(Element) * 8) != 0) { 2562 if (srcElem1 < 0) { 2563 destElem = 0; 2564 } else { 2565 destElem = mask(sizeof(Element) * 8); 2566 } 2567 fpscr.qc = 1; 2568 } else { 2569 destElem = mid; 2570 } 2571 } else { 2572 destElem = srcElem1; 2573 } 2574 FpscrQc = fpscr; 2575 ''' 2576 twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes, 2577 sqshrunCode, hasImm=True) 2578 twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes, 2579 sqshrunCode, hasImm=True, hi=True) 2580 twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes, 2581 sqshrunCode, hasImm=True, scalar=True) 2582 # SQSUB 2583 sqsubCode = ''' 2584 destElem = srcElem1 - srcElem2; 2585 FPSCR fpscr = (FPSCR) FpscrQc; 2586 bool negDest = (destElem < 0); 2587 bool negSrc1 = (srcElem1 < 0); 2588 bool posSrc2 = (srcElem2 >= 0); 2589 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 2590 destElem = std::numeric_limits<Element>::min(); 2591 if (negDest) 2592 destElem -= 1; 2593 fpscr.qc = 1; 2594 } 2595 FpscrQc = fpscr; 2596 ''' 2597 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2, 2598 sqsubCode) 2599 threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4, 2600 sqsubCode) 2601 threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4, 2602 sqsubCode, scalar=True) 2603 # SQXTN, SQXTN2 2604 sqxtnCode = ''' 2605 FPSCR fpscr = (FPSCR) FpscrQc; 2606 destElem = srcElem1; 2607 if ((BigElement)destElem != srcElem1) { 2608 fpscr.qc = 1; 2609 destElem = mask(sizeof(Element) * 8 - 1); 2610 if (srcElem1 < 0) 2611 destElem = ~destElem; 2612 } 2613 FpscrQc = fpscr; 2614 ''' 2615 twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes, 2616 sqxtnCode) 2617 twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes, 2618 sqxtnCode, hi=True) 2619 twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes, 2620 sqxtnCode, scalar=True) 2621 # SQXTUN, SQXTUN2 2622 sqxtunCode = ''' 2623 FPSCR fpscr = (FPSCR) FpscrQc; 2624 destElem = srcElem1; 2625 if (srcElem1 < 0 || 2626 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 2627 fpscr.qc = 1; 2628 destElem = mask(sizeof(Element) * 8); 2629 if (srcElem1 < 0) 2630 destElem = ~destElem; 2631 } 2632 FpscrQc = fpscr; 2633 ''' 2634 twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes, 2635 sqxtunCode) 2636 twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes, 2637 sqxtunCode, hi=True) 2638 twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes, 2639 sqxtunCode, scalar=True) 2640 # SRHADD 2641 rhaddCode = ''' 2642 Element carryBit = 2643 (((unsigned)srcElem1 & 0x1) + 2644 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 2645 // Use division instead of a shift to ensure the sign extension works 2646 // right. The compiler will figure out if it can be a shift. Mask the 2647 // inputs so they get truncated correctly. 2648 destElem = (((srcElem1 & ~(Element)1) / 2) + 2649 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 2650 ''' 2651 threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2, 2652 rhaddCode) 2653 threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4, 2654 rhaddCode) 2655 # SRI 2656 sriCode = ''' 2657 if (imm >= sizeof(Element) * 8) 2658 destElem = destElem; 2659 else 2660 destElem = (srcElem1 >> imm) | 2661 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2662 ''' 2663 twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode, 2664 True, hasImm=True) 2665 twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode, 2666 True, hasImm=True) 2667 # SRSHL 2668 rshlCode = ''' 2669 int16_t shiftAmt = (int8_t)srcElem2; 2670 if (shiftAmt < 0) { 2671 shiftAmt = -shiftAmt; 2672 Element rBit = 0; 2673 if (shiftAmt <= sizeof(Element) * 8) 2674 rBit = bits(srcElem1, shiftAmt - 1); 2675 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 2676 rBit = 1; 2677 if (shiftAmt >= sizeof(Element) * 8) { 2678 shiftAmt = sizeof(Element) * 8 - 1; 2679 destElem = 0; 2680 } else { 2681 destElem = (srcElem1 >> shiftAmt); 2682 } 2683 // Make sure the right shift sign extended when it should. 2684 if (ltz(srcElem1) && !ltz(destElem)) { 2685 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2686 1 - shiftAmt)); 2687 } 2688 destElem += rBit; 2689 } else if (shiftAmt > 0) { 2690 if (shiftAmt >= sizeof(Element) * 8) { 2691 destElem = 0; 2692 } else { 2693 destElem = srcElem1 << shiftAmt; 2694 } 2695 } else { 2696 destElem = srcElem1; 2697 } 2698 ''' 2699 threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2, 2700 rshlCode) 2701 threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4, 2702 rshlCode) 2703 # SRSHR 2704 rshrCode = ''' 2705 if (imm > sizeof(srcElem1) * 8) { 2706 destElem = 0; 2707 } else if (imm) { 2708 Element rBit = bits(srcElem1, imm - 1); 2709 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2710 } else { 2711 destElem = srcElem1; 2712 } 2713 ''' 2714 twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2, 2715 rshrCode, hasImm=True) 2716 twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4, 2717 rshrCode, hasImm=True) 2718 # SRSRA 2719 rsraCode = ''' 2720 if (imm > sizeof(srcElem1) * 8) { 2721 destElem += 0; 2722 } else if (imm) { 2723 Element rBit = bits(srcElem1, imm - 1); 2724 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2725 } else { 2726 destElem += srcElem1; 2727 } 2728 ''' 2729 twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2, 2730 rsraCode, True, hasImm=True) 2731 twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4, 2732 rsraCode, True, hasImm=True) 2733 # SSHL 2734 shlCode = ''' 2735 int16_t shiftAmt = (int8_t)srcElem2; 2736 if (shiftAmt < 0) { 2737 shiftAmt = -shiftAmt; 2738 if (shiftAmt >= sizeof(Element) * 8) { 2739 shiftAmt = sizeof(Element) * 8 - 1; 2740 destElem = 0; 2741 } else { 2742 destElem = (srcElem1 >> shiftAmt); 2743 } 2744 // Make sure the right shift sign extended when it should. 2745 if (ltz(srcElem1) && !ltz(destElem)) { 2746 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2747 1 - shiftAmt)); 2748 } 2749 } else { 2750 if (shiftAmt >= sizeof(Element) * 8) { 2751 destElem = 0; 2752 } else { 2753 destElem = srcElem1 << shiftAmt; 2754 } 2755 } 2756 ''' 2757 threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2, 2758 shlCode) 2759 threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4, 2760 shlCode) 2761 # SSHLL, SSHLL2 2762 shllCode = ''' 2763 if (imm >= sizeof(destElem) * 8) { 2764 destElem = 0; 2765 } else { 2766 destElem = (BigElement)srcElem1 << imm; 2767 } 2768 ''' 2769 twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes, 2770 shllCode, hasImm=True) 2771 twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes, 2772 shllCode, hasImm=True, hi=True) 2773 # SSHR 2774 shrCode = ''' 2775 if (imm >= sizeof(srcElem1) * 8) { 2776 if (ltz(srcElem1)) 2777 destElem = -1; 2778 else 2779 destElem = 0; 2780 } else { 2781 destElem = srcElem1 >> imm; 2782 } 2783 ''' 2784 twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode, 2785 hasImm=True) 2786 twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode, 2787 hasImm=True) 2788 # SSRA 2789 sraCode = ''' 2790 Element mid;; 2791 if (imm >= sizeof(srcElem1) * 8) { 2792 mid = ltz(srcElem1) ? -1 : 0; 2793 } else { 2794 mid = srcElem1 >> imm; 2795 if (ltz(srcElem1) && !ltz(mid)) { 2796 mid |= -(mid & ((Element)1 << 2797 (sizeof(Element) * 8 - 1 - imm))); 2798 } 2799 } 2800 destElem += mid; 2801 ''' 2802 twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode, 2803 True, hasImm=True) 2804 twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode, 2805 True, hasImm=True) 2806 # SSUBL 2807 sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;" 2808 threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes, 2809 sublwCode) 2810 threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes, 2811 sublwCode, hi=True) 2812 # SSUBW 2813 threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes, 2814 sublwCode) 2815 threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes, 2816 sublwCode, hi=True) 2817 # SUB 2818 subCode = "destElem = srcElem1 - srcElem2;" 2819 threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode) 2820 threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode) 2821 # SUBHN, SUBHN2 2822 subhnCode = ''' 2823 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 2824 (sizeof(Element) * 8); 2825 ''' 2826 threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes, 2827 subhnCode) 2828 threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes, 2829 subhnCode, hi=True) 2830 # SUQADD 2831 suqaddCode = ''' 2832 FPSCR fpscr = (FPSCR) FpscrQc; 2833 Element tmp = destElem + srcElem1; 2834 if (bits(destElem, sizeof(Element) * 8 - 1) == 0) { 2835 if (bits(tmp, sizeof(Element) * 8 - 1) == 1 || 2836 tmp < srcElem1 || tmp < destElem) { 2837 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; 2838 fpscr.qc = 1; 2839 } else { 2840 destElem = tmp; 2841 } 2842 } else { 2843 Element absDestElem = (~destElem) + 1; 2844 if (absDestElem < srcElem1) { 2845 // Still check for positive sat., no need to check for negative sat. 2846 if (bits(tmp, sizeof(Element) * 8 - 1) == 1) { 2847 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; 2848 fpscr.qc = 1; 2849 } else { 2850 destElem = tmp; 2851 } 2852 } else { 2853 destElem = tmp; 2854 } 2855 } 2856 FpscrQc = fpscr; 2857 ''' 2858 twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 2859 suqaddCode, True) 2860 twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4, 2861 suqaddCode, True) 2862 twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4, 2863 suqaddCode, True, scalar=True) 2864 # SXTL -> alias to SSHLL 2865 # TBL 2866 tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2) 2867 tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4) 2868 tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2) 2869 tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4) 2870 tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2) 2871 tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4) 2872 tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2) 2873 tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4) 2874 # TBX 2875 tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2) 2876 tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4) 2877 tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2) 2878 tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4) 2879 tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2) 2880 tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4) 2881 tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2) 2882 tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4) 2883 # TRN1 2884 trnCode = ''' 2885 unsigned part = %s; 2886 for (unsigned i = 0; i < eCount / 2; i++) { 2887 destReg.elements[2 * i] = srcReg1.elements[2 * i + part]; 2888 destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part]; 2889 } 2890 ''' 2891 threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2, 2892 trnCode % "0") 2893 threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4, 2894 trnCode % "0") 2895 # TRN2 2896 threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2, 2897 trnCode % "1") 2898 threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4, 2899 trnCode % "1") 2900 # UABA 2901 threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2, 2902 abaCode, True) 2903 threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4, 2904 abaCode, True) 2905 # UABAL, UABAL2 2906 threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes, 2907 abalCode, True) 2908 threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes, 2909 abalCode, True, hi=True) 2910 # UABD 2911 threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2, 2912 abdCode) 2913 threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4, 2914 abdCode) 2915 # UABDL, UABDL2 2916 threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes, 2917 abdlCode, True) 2918 threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes, 2919 abdlCode, True, hi=True) 2920 # UADALP 2921 twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes, 2922 2, adalpCode, True) 2923 twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes, 2924 4, adalpCode, True) 2925 # UADDL, UADDL2 2926 threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes, 2927 addlwCode) 2928 threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes, 2929 addlwCode, hi=True) 2930 # UADDLP 2931 twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes, 2932 2, addlwCode) 2933 twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes, 2934 4, addlwCode) 2935 # UADDLV 2936 twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp", 2937 ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True) 2938 twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp", 2939 ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True) 2940 twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4, 2941 addAcrossLongCode, doubleDest=True, long=True) 2942 # UADDW 2943 threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes, 2944 addlwCode) 2945 threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes, 2946 addlwCode, hi=True) 2947 # UCVTF (fixed-point) 2948 ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true," 2949 " FPCRRounding(fpscr), fpscr)") 2950 twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, 2951 ucvtfFixedCode, hasImm=True) 2952 twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4, 2953 ucvtfFixedCode, hasImm=True) 2954 twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4, 2955 ucvtfFixedCode, hasImm=True, scalar=True) 2956 # UCVTF (integer) 2957 ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true," 2958 " FPCRRounding(fpscr), fpscr)") 2959 twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, 2960 ucvtfIntCode) 2961 twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4, 2962 ucvtfIntCode) 2963 twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4, 2964 ucvtfIntCode, scalar=True) 2965 # UHADD 2966 threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2, 2967 haddCode) 2968 threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4, 2969 haddCode) 2970 # UHSUB 2971 threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2, 2972 hsubCode) 2973 threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4, 2974 hsubCode) 2975 # UMAX 2976 threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2, 2977 maxCode) 2978 threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4, 2979 maxCode) 2980 # UMAXP 2981 threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2, 2982 maxCode, pairwise=True) 2983 threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4, 2984 maxCode, pairwise=True) 2985 # UMAXV 2986 twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), 2987 2, maxAcrossCode) 2988 twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4, 2989 maxAcrossCode) 2990 # UMIN 2991 threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2, 2992 minCode) 2993 threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4, 2994 minCode) 2995 # UMINP 2996 threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2, 2997 minCode, pairwise=True) 2998 threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4, 2999 minCode, pairwise=True) 3000 # UMINV 3001 twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), 3002 2, minAcrossCode) 3003 twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4, 3004 minAcrossCode) 3005 # UMLAL (by element) 3006 threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp", 3007 smallUnsignedTypes, mlalCode, True, byElem=True) 3008 threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp", 3009 smallUnsignedTypes, mlalCode, True, byElem=True, hi=True) 3010 # UMLAL (vector) 3011 threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes, 3012 mlalCode, True) 3013 threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes, 3014 mlalCode, True, hi=True) 3015 # UMLSL (by element) 3016 threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp", 3017 smallUnsignedTypes, mlslCode, True, byElem=True) 3018 threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp", 3019 smallUnsignedTypes, mlslCode, True, byElem=True, hi=True) 3020 # UMLSL (vector) 3021 threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes, 3022 mlslCode, True) 3023 threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes, 3024 mlslCode, True, hi=True) 3025 # UMOV 3026 insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') 3027 insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X') 3028 # UMULL, UMULL2 (by element) 3029 threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes, 3030 mullCode, byElem=True) 3031 threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes, 3032 mullCode, byElem=True, hi=True) 3033 # UMULL, UMULL2 (vector) 3034 threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes, 3035 mullCode) 3036 threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes, 3037 mullCode, hi=True) 3038 # UQADD 3039 uqaddCode = ''' 3040 destElem = srcElem1 + srcElem2; 3041 FPSCR fpscr = (FPSCR) FpscrQc; 3042 if (destElem < srcElem1 || destElem < srcElem2) { 3043 destElem = (Element)(-1); 3044 fpscr.qc = 1; 3045 } 3046 FpscrQc = fpscr; 3047 ''' 3048 threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 3049 uqaddCode) 3050 threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4, 3051 uqaddCode) 3052 threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4, 3053 uqaddCode, scalar=True) 3054 # UQRSHL 3055 uqrshlCode = ''' 3056 int16_t shiftAmt = (int8_t)srcElem2; 3057 FPSCR fpscr = (FPSCR) FpscrQc; 3058 if (shiftAmt < 0) { 3059 shiftAmt = -shiftAmt; 3060 Element rBit = 0; 3061 if (shiftAmt <= sizeof(Element) * 8) 3062 rBit = bits(srcElem1, shiftAmt - 1); 3063 if (shiftAmt >= sizeof(Element) * 8) { 3064 shiftAmt = sizeof(Element) * 8 - 1; 3065 destElem = 0; 3066 } else { 3067 destElem = (srcElem1 >> shiftAmt); 3068 } 3069 destElem += rBit; 3070 } else { 3071 if (shiftAmt >= sizeof(Element) * 8) { 3072 if (srcElem1 != 0) { 3073 destElem = mask(sizeof(Element) * 8); 3074 fpscr.qc = 1; 3075 } else { 3076 destElem = 0; 3077 } 3078 } else { 3079 if (bits(srcElem1, sizeof(Element) * 8 - 1, 3080 sizeof(Element) * 8 - shiftAmt)) { 3081 destElem = mask(sizeof(Element) * 8); 3082 fpscr.qc = 1; 3083 } else { 3084 destElem = srcElem1 << shiftAmt; 3085 } 3086 } 3087 } 3088 FpscrQc = fpscr; 3089 ''' 3090 threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes, 3091 2, uqrshlCode) 3092 threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4, 3093 uqrshlCode) 3094 threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4, 3095 uqrshlCode, scalar=True) 3096 # UQRSHRN 3097 uqrshrnCode = ''' 3098 FPSCR fpscr = (FPSCR) FpscrQc; 3099 if (imm > sizeof(srcElem1) * 8) { 3100 if (srcElem1 != 0) 3101 fpscr.qc = 1; 3102 destElem = 0; 3103 } else if (imm) { 3104 BigElement mid = (srcElem1 >> (imm - 1)); 3105 uint64_t rBit = mid & 0x1; 3106 mid >>= 1; 3107 mid += rBit; 3108 if (mid != (Element)mid) { 3109 destElem = mask(sizeof(Element) * 8); 3110 fpscr.qc = 1; 3111 } else { 3112 destElem = mid; 3113 } 3114 } else { 3115 if (srcElem1 != (Element)srcElem1) { 3116 destElem = mask(sizeof(Element) * 8 - 1); 3117 fpscr.qc = 1; 3118 } else { 3119 destElem = srcElem1; 3120 } 3121 } 3122 FpscrQc = fpscr; 3123 ''' 3124 twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes, 3125 uqrshrnCode, hasImm=True) 3126 twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp", 3127 smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True) 3128 twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp", 3129 smallUnsignedTypes, uqrshrnCode, hasImm=True, 3130 scalar=True) 3131 # UQSHL (immediate) 3132 uqshlImmCode = ''' 3133 FPSCR fpscr = (FPSCR) FpscrQc; 3134 if (imm >= sizeof(Element) * 8) { 3135 if (srcElem1 != 0) { 3136 destElem = mask(sizeof(Element) * 8); 3137 fpscr.qc = 1; 3138 } else { 3139 destElem = 0; 3140 } 3141 } else if (imm) { 3142 destElem = (srcElem1 << imm); 3143 uint64_t topBits = bits((uint64_t)srcElem1, 3144 sizeof(Element) * 8 - 1, 3145 sizeof(Element) * 8 - imm); 3146 if (topBits != 0) { 3147 destElem = mask(sizeof(Element) * 8); 3148 fpscr.qc = 1; 3149 } 3150 } else { 3151 destElem = srcElem1; 3152 } 3153 FpscrQc = fpscr; 3154 ''' 3155 twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2, 3156 uqshlImmCode, hasImm=True) 3157 twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4, 3158 uqshlImmCode, hasImm=True) 3159 twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4, 3160 uqshlImmCode, hasImm=True, scalar=True) 3161 # UQSHL (register) 3162 uqshlCode = ''' 3163 int16_t shiftAmt = (int8_t)srcElem2; 3164 FPSCR fpscr = (FPSCR) FpscrQc; 3165 if (shiftAmt < 0) { 3166 shiftAmt = -shiftAmt; 3167 if (shiftAmt >= sizeof(Element) * 8) { 3168 shiftAmt = sizeof(Element) * 8 - 1; 3169 destElem = 0; 3170 } else { 3171 destElem = (srcElem1 >> shiftAmt); 3172 } 3173 } else if (shiftAmt > 0) { 3174 if (shiftAmt >= sizeof(Element) * 8) { 3175 if (srcElem1 != 0) { 3176 destElem = mask(sizeof(Element) * 8); 3177 fpscr.qc = 1; 3178 } else { 3179 destElem = 0; 3180 } 3181 } else { 3182 if (bits(srcElem1, sizeof(Element) * 8 - 1, 3183 sizeof(Element) * 8 - shiftAmt)) { 3184 destElem = mask(sizeof(Element) * 8); 3185 fpscr.qc = 1; 3186 } else { 3187 destElem = srcElem1 << shiftAmt; 3188 } 3189 } 3190 } else { 3191 destElem = srcElem1; 3192 } 3193 FpscrQc = fpscr; 3194 ''' 3195 threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2, 3196 uqshlCode) 3197 threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4, 3198 uqshlCode) 3199 threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4, 3200 uqshlCode, scalar=True) 3201 # UQSHRN, UQSHRN2 3202 uqshrnCode = ''' 3203 FPSCR fpscr = (FPSCR) FpscrQc; 3204 if (imm > sizeof(srcElem1) * 8) { 3205 if (srcElem1 != 0) 3206 fpscr.qc = 1; 3207 destElem = 0; 3208 } else if (imm) { 3209 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3210 if (mid != (Element)mid) { 3211 destElem = mask(sizeof(Element) * 8); 3212 fpscr.qc = 1; 3213 } else { 3214 destElem = mid; 3215 } 3216 } else { 3217 destElem = srcElem1; 3218 } 3219 FpscrQc = fpscr; 3220 ''' 3221 twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes, 3222 uqshrnCode, hasImm=True) 3223 twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes, 3224 uqshrnCode, hasImm=True, hi=True) 3225 twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes, 3226 uqshrnCode, hasImm=True, scalar=True) 3227 # UQSUB 3228 uqsubCode = ''' 3229 destElem = srcElem1 - srcElem2; 3230 FPSCR fpscr = (FPSCR) FpscrQc; 3231 if (destElem > srcElem1) { 3232 destElem = 0; 3233 fpscr.qc = 1; 3234 } 3235 FpscrQc = fpscr; 3236 ''' 3237 threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2, 3238 uqsubCode) 3239 threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4, 3240 uqsubCode) 3241 threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4, 3242 uqsubCode, scalar=True) 3243 # UQXTN 3244 uqxtnCode = ''' 3245 FPSCR fpscr = (FPSCR) FpscrQc; 3246 destElem = srcElem1; 3247 if ((BigElement)destElem != srcElem1) { 3248 fpscr.qc = 1; 3249 destElem = mask(sizeof(Element) * 8); 3250 } 3251 FpscrQc = fpscr; 3252 ''' 3253 twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes, 3254 uqxtnCode) 3255 twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes, 3256 uqxtnCode, hi=True) 3257 twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes, 3258 uqxtnCode, scalar=True) 3259 # URECPE 3260 urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);" 3261 twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2, 3262 urecpeCode) 3263 twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4, 3264 urecpeCode) 3265 # URHADD 3266 threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes, 3267 2, rhaddCode) 3268 threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes, 3269 4, rhaddCode) 3270 # URSHL 3271 threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2, 3272 rshlCode) 3273 threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4, 3274 rshlCode) 3275 # URSHR 3276 twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2, 3277 rshrCode, hasImm=True) 3278 twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4, 3279 rshrCode, hasImm=True) 3280 # URSQRTE 3281 ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);" 3282 twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2, 3283 ursqrteCode) 3284 twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4, 3285 ursqrteCode) 3286 # URSRA 3287 twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2, 3288 rsraCode, True, hasImm=True) 3289 twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4, 3290 rsraCode, True, hasImm=True) 3291 # USHL 3292 threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2, 3293 shlCode) 3294 threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4, 3295 shlCode) 3296 # USHLL, USHLL2 3297 twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes, 3298 shllCode, hasImm=True) 3299 twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes, 3300 shllCode, hi=True, hasImm=True) 3301 # USHR 3302 twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2, 3303 shrCode, hasImm=True) 3304 twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4, 3305 shrCode, hasImm=True) 3306 # USQADD 3307 usqaddCode = ''' 3308 FPSCR fpscr = (FPSCR) FpscrQc; 3309 Element tmp = destElem + srcElem1; 3310 if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) { 3311 if (tmp < srcElem1 || tmp < destElem) { 3312 destElem = (Element)(-1); 3313 fpscr.qc = 1; 3314 } else { 3315 destElem = tmp; 3316 } 3317 } else { 3318 Element absSrcElem1 = (~srcElem1) + 1; 3319 if (absSrcElem1 > destElem) { 3320 destElem = 0; 3321 fpscr.qc = 1; 3322 } else { 3323 destElem = tmp; 3324 } 3325 } 3326 FpscrQc = fpscr; 3327 ''' 3328 twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 3329 usqaddCode, True) 3330 twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4, 3331 usqaddCode, True) 3332 twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4, 3333 usqaddCode, True, scalar=True) 3334 # USRA 3335 twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2, 3336 sraCode, True, hasImm=True) 3337 twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4, 3338 sraCode, True, hasImm=True) 3339 # USUBL 3340 threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes, 3341 sublwCode) 3342 threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes, 3343 sublwCode, hi=True) 3344 # USUBW 3345 threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes, 3346 sublwCode) 3347 threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes, 3348 sublwCode, hi=True) 3349 # UXTL -> alias to USHLL 3350 # UZP1 3351 uzpCode = ''' 3352 unsigned part = %s; 3353 for (unsigned i = 0; i < eCount / 2; i++) { 3354 destReg.elements[i] = srcReg1.elements[2 * i + part]; 3355 destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part]; 3356 } 3357 ''' 3358 threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2, 3359 uzpCode % "0") 3360 threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4, 3361 uzpCode % "0") 3362 # UZP2 3363 threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2, 3364 uzpCode % "1") 3365 threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4, 3366 uzpCode % "1") 3367 # XTN, XTN2 3368 xtnCode = "destElem = srcElem1;" 3369 twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode) 3370 twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes, 3371 xtnCode, hi=True) 3372 # ZIP1 3373 zipCode = ''' 3374 unsigned base = %s; 3375 for (unsigned i = 0; i < eCount / 2; i++) { 3376 destReg.elements[2 * i] = srcReg1.elements[base + i]; 3377 destReg.elements[2 * i + 1] = srcReg2.elements[base + i]; 3378 } 3379 ''' 3380 threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2, 3381 zipCode % "0") 3382 threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4, 3383 zipCode % "0") 3384 # ZIP2 3385 threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2, 3386 zipCode % "eCount / 2") 3387 threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4, 3388 zipCode % "eCount / 2") 3389 3390 for decoderFlavour, type_dict in decoders.iteritems(): 3391 header_output += ''' 3392 class %(decoder_flavour)sDecoder { 3393 public: 3394 ''' % { "decoder_flavour" : decoderFlavour } 3395 for type,name in type_dict.iteritems(): 3396 header_output += ''' 3397 template<typename Elem> using %(type)s = %(new_name)s<Elem>;''' % { 3398 "type" : type, "new_name" : name 3399 } 3400 header_output += ''' 3401 };''' 3402}}; 3403