neon64.isa revision 11165:d90aec9435bd
1// -*- mode: c++ -*- 2 3// Copyright (c) 2012-2013, 2015 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating 9// to a hardware implementation of the functionality of the software 10// licensed hereunder. You may use the software subject to the license 11// terms below provided that you ensure that this notice is replicated 12// unmodified and in its entirety in all distributions of the software, 13// modified or unmodified, in source code or in binary form. 14// 15// Redistribution and use in source and binary forms, with or without 16// modification, are permitted provided that the following conditions are 17// met: redistributions of source code must retain the above copyright 18// notice, this list of conditions and the following disclaimer; 19// redistributions in binary form must reproduce the above copyright 20// notice, this list of conditions and the following disclaimer in the 21// documentation and/or other materials provided with the distribution; 22// neither the name of the copyright holders nor the names of its 23// contributors may be used to endorse or promote products derived from 24// this software without specific prior written permission. 25// 26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37// 38// Authors: Giacomo Gabrielli 39// Mbou Eyole 40 41let {{ 42 43 header_output = "" 44 exec_output = "" 45 decoders = { 'Generic' : {} } 46 47 # FP types (FP operations always work with unsigned representations) 48 floatTypes = ("uint32_t", "uint64_t") 49 smallFloatTypes = ("uint32_t",) 50 51 def threeEqualRegInstX(name, Name, opClass, types, rCount, op, 52 readDest=False, pairwise=False, scalar=False, 53 byElem=False, decoder='Generic'): 54 assert (not pairwise) or ((not byElem) and (not scalar)) 55 global header_output, exec_output, decoders 56 eWalkCode = simd64EnabledCheckCode + ''' 57 RegVect srcReg1, destReg; 58 ''' 59 if byElem: 60 # 2nd register operand has to be read fully 61 eWalkCode += ''' 62 FullRegVect srcReg2; 63 ''' 64 else: 65 eWalkCode += ''' 66 RegVect srcReg2; 67 ''' 68 for reg in range(rCount): 69 eWalkCode += ''' 70 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 71 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 72 ''' % { "reg" : reg } 73 if readDest: 74 eWalkCode += ''' 75 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 76 ''' % { "reg" : reg } 77 if byElem: 78 # 2nd operand has to be read fully 79 for reg in range(rCount, 4): 80 eWalkCode += ''' 81 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 82 ''' % { "reg" : reg } 83 readDestCode = '' 84 if readDest: 85 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 86 if pairwise: 87 eWalkCode += ''' 88 for (unsigned i = 0; i < eCount; i++) { 89 Element srcElem1 = gtoh(2 * i < eCount ? 90 srcReg1.elements[2 * i] : 91 srcReg2.elements[2 * i - eCount]); 92 Element srcElem2 = gtoh(2 * i < eCount ? 93 srcReg1.elements[2 * i + 1] : 94 srcReg2.elements[2 * i + 1 - eCount]); 95 Element destElem; 96 %(readDest)s 97 %(op)s 98 destReg.elements[i] = htog(destElem); 99 } 100 ''' % { "op" : op, "readDest" : readDestCode } 101 else: 102 scalarCheck = ''' 103 if (i != 0) { 104 destReg.elements[i] = 0; 105 continue; 106 } 107 ''' 108 eWalkCode += ''' 109 for (unsigned i = 0; i < eCount; i++) { 110 %(scalarCheck)s 111 Element srcElem1 = gtoh(srcReg1.elements[i]); 112 Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); 113 Element destElem; 114 %(readDest)s 115 %(op)s 116 destReg.elements[i] = htog(destElem); 117 } 118 ''' % { "op" : op, "readDest" : readDestCode, 119 "scalarCheck" : scalarCheck if scalar else "", 120 "src2Index" : "imm" if byElem else "i" } 121 for reg in range(rCount): 122 eWalkCode += ''' 123 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 124 ''' % { "reg" : reg } 125 if rCount < 4: # zero upper half 126 for reg in range(rCount, 4): 127 eWalkCode += ''' 128 AA64FpDestP%(reg)d_uw = 0; 129 ''' % { "reg" : reg } 130 iop = InstObjParams(name, Name, 131 "DataX2RegImmOp" if byElem else "DataX2RegOp", 132 { "code": eWalkCode, 133 "r_count": rCount, 134 "op_class": opClass }, []) 135 if byElem: 136 header_output += NeonX2RegImmOpDeclare.subst(iop) 137 else: 138 header_output += NeonX2RegOpDeclare.subst(iop) 139 exec_output += NeonXEqualRegOpExecute.subst(iop) 140 for type in types: 141 substDict = { "targs" : type, 142 "class_name" : Name } 143 exec_output += NeonXExecDeclare.subst(substDict) 144 145 def threeUnequalRegInstX(name, Name, opClass, types, op, 146 bigSrc1, bigSrc2, bigDest, readDest, scalar=False, 147 byElem=False, hi=False): 148 assert not (scalar and hi) 149 global header_output, exec_output 150 src1Cnt = src2Cnt = destCnt = 2 151 src1Prefix = src2Prefix = destPrefix = '' 152 if bigSrc1: 153 src1Cnt = 4 154 src1Prefix = 'Big' 155 if bigSrc2: 156 src2Cnt = 4 157 src2Prefix = 'Big' 158 if bigDest: 159 destCnt = 4 160 destPrefix = 'Big' 161 if byElem: 162 src2Prefix = 'Full' 163 eWalkCode = simd64EnabledCheckCode + ''' 164 %sRegVect srcReg1; 165 %sRegVect srcReg2; 166 %sRegVect destReg; 167 ''' % (src1Prefix, src2Prefix, destPrefix) 168 srcReg1 = 0 169 if hi and not bigSrc1: # long/widening operations 170 srcReg1 = 2 171 for reg in range(src1Cnt): 172 eWalkCode += ''' 173 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw); 174 ''' % { "reg" : reg, "srcReg1" : srcReg1 } 175 srcReg1 += 1 176 srcReg2 = 0 177 if (not byElem) and (hi and not bigSrc2): # long/widening operations 178 srcReg2 = 2 179 for reg in range(src2Cnt): 180 eWalkCode += ''' 181 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw); 182 ''' % { "reg" : reg, "srcReg2" : srcReg2 } 183 srcReg2 += 1 184 if byElem: 185 # 2nd operand has to be read fully 186 for reg in range(src2Cnt, 4): 187 eWalkCode += ''' 188 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 189 ''' % { "reg" : reg } 190 if readDest: 191 for reg in range(destCnt): 192 eWalkCode += ''' 193 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 194 ''' % { "reg" : reg } 195 readDestCode = '' 196 if readDest: 197 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 198 scalarCheck = ''' 199 if (i != 0) { 200 destReg.elements[i] = 0; 201 continue; 202 } 203 ''' 204 eWalkCode += ''' 205 for (unsigned i = 0; i < eCount; i++) { 206 %(scalarCheck)s 207 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); 208 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); 209 %(destPrefix)sElement destElem; 210 %(readDest)s 211 %(op)s 212 destReg.elements[i] = htog(destElem); 213 } 214 ''' % { "op" : op, "readDest" : readDestCode, 215 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, 216 "destPrefix" : destPrefix, 217 "scalarCheck" : scalarCheck if scalar else "", 218 "src2Index" : "imm" if byElem else "i" } 219 destReg = 0 220 if hi and not bigDest: 221 # narrowing operations 222 destReg = 2 223 for reg in range(destCnt): 224 eWalkCode += ''' 225 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); 226 ''' % { "reg" : reg, "destReg": destReg } 227 destReg += 1 228 if destCnt < 4 and not hi: # zero upper half 229 for reg in range(destCnt, 4): 230 eWalkCode += ''' 231 AA64FpDestP%(reg)d_uw = 0; 232 ''' % { "reg" : reg } 233 iop = InstObjParams(name, Name, 234 "DataX2RegImmOp" if byElem else "DataX2RegOp", 235 { "code": eWalkCode, 236 "r_count": 2, 237 "op_class": opClass }, []) 238 if byElem: 239 header_output += NeonX2RegImmOpDeclare.subst(iop) 240 else: 241 header_output += NeonX2RegOpDeclare.subst(iop) 242 exec_output += NeonXUnequalRegOpExecute.subst(iop) 243 for type in types: 244 substDict = { "targs" : type, 245 "class_name" : Name } 246 exec_output += NeonXExecDeclare.subst(substDict) 247 248 def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False, 249 scalar=False, byElem=False, hi=False): 250 assert not byElem 251 threeUnequalRegInstX(name, Name, opClass, types, op, 252 True, True, False, readDest, scalar, byElem, hi) 253 254 def threeRegLongInstX(name, Name, opClass, types, op, readDest=False, 255 scalar=False, byElem=False, hi=False): 256 threeUnequalRegInstX(name, Name, opClass, types, op, 257 False, False, True, readDest, scalar, byElem, hi) 258 259 def threeRegWideInstX(name, Name, opClass, types, op, readDest=False, 260 scalar=False, byElem=False, hi=False): 261 assert not byElem 262 threeUnequalRegInstX(name, Name, opClass, types, op, 263 True, False, True, readDest, scalar, byElem, hi) 264 265 def twoEqualRegInstX(name, Name, opClass, types, rCount, op, 266 readDest=False, scalar=False, byElem=False, 267 hasImm=False, isDup=False): 268 global header_output, exec_output 269 assert (not isDup) or byElem 270 if byElem: 271 hasImm = True 272 if isDup: 273 eWalkCode = simd64EnabledCheckCode + ''' 274 FullRegVect srcReg1; 275 RegVect destReg; 276 ''' 277 else: 278 eWalkCode = simd64EnabledCheckCode + ''' 279 RegVect srcReg1, destReg; 280 ''' 281 for reg in range(4 if isDup else rCount): 282 eWalkCode += ''' 283 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 284 ''' % { "reg" : reg } 285 if readDest: 286 eWalkCode += ''' 287 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 288 ''' % { "reg" : reg } 289 readDestCode = '' 290 if readDest: 291 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 292 scalarCheck = ''' 293 if (i != 0) { 294 destReg.elements[i] = 0; 295 continue; 296 } 297 ''' 298 eWalkCode += ''' 299 for (unsigned i = 0; i < eCount; i++) { 300 %(scalarCheck)s 301 unsigned j = i; 302 Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]); 303 Element destElem; 304 %(readDest)s 305 %(op)s 306 destReg.elements[j] = htog(destElem); 307 } 308 ''' % { "op" : op, "readDest" : readDestCode, 309 "scalarCheck" : scalarCheck if scalar else "", 310 "src1Index" : "imm" if byElem else "i" } 311 for reg in range(rCount): 312 eWalkCode += ''' 313 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 314 ''' % { "reg" : reg } 315 if rCount < 4: # zero upper half 316 for reg in range(rCount, 4): 317 eWalkCode += ''' 318 AA64FpDestP%(reg)d_uw = 0; 319 ''' % { "reg" : reg } 320 iop = InstObjParams(name, Name, 321 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 322 { "code": eWalkCode, 323 "r_count": rCount, 324 "op_class": opClass }, []) 325 if hasImm: 326 header_output += NeonX1RegImmOpDeclare.subst(iop) 327 else: 328 header_output += NeonX1RegOpDeclare.subst(iop) 329 exec_output += NeonXEqualRegOpExecute.subst(iop) 330 for type in types: 331 substDict = { "targs" : type, 332 "class_name" : Name } 333 exec_output += NeonXExecDeclare.subst(substDict) 334 335 def twoRegLongInstX(name, Name, opClass, types, op, readDest=False, 336 hi=False, hasImm=False): 337 global header_output, exec_output 338 eWalkCode = simd64EnabledCheckCode + ''' 339 RegVect srcReg1; 340 BigRegVect destReg; 341 ''' 342 destReg = 0 if not hi else 2 343 for reg in range(2): 344 eWalkCode += ''' 345 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw); 346 ''' % { "reg" : reg, "destReg": destReg } 347 destReg += 1 348 destReg = 0 if not hi else 2 349 if readDest: 350 for reg in range(4): 351 eWalkCode += ''' 352 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 353 ''' % { "reg" : reg } 354 destReg += 1 355 readDestCode = '' 356 if readDest: 357 readDestCode = 'destReg = gtoh(destReg.elements[i]);' 358 eWalkCode += ''' 359 for (unsigned i = 0; i < eCount; i++) { 360 Element srcElem1 = gtoh(srcReg1.elements[i]); 361 BigElement destElem; 362 %(readDest)s 363 %(op)s 364 destReg.elements[i] = htog(destElem); 365 } 366 ''' % { "op" : op, "readDest" : readDestCode } 367 for reg in range(4): 368 eWalkCode += ''' 369 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 370 ''' % { "reg" : reg } 371 iop = InstObjParams(name, Name, 372 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 373 { "code": eWalkCode, 374 "r_count": 2, 375 "op_class": opClass }, []) 376 if hasImm: 377 header_output += NeonX1RegImmOpDeclare.subst(iop) 378 else: 379 header_output += NeonX1RegOpDeclare.subst(iop) 380 exec_output += NeonXUnequalRegOpExecute.subst(iop) 381 for type in types: 382 substDict = { "targs" : type, 383 "class_name" : Name } 384 exec_output += NeonXExecDeclare.subst(substDict) 385 386 def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False, 387 scalar=False, hi=False, hasImm=False): 388 global header_output, exec_output 389 eWalkCode = simd64EnabledCheckCode + ''' 390 BigRegVect srcReg1; 391 RegVect destReg; 392 ''' 393 for reg in range(4): 394 eWalkCode += ''' 395 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 396 ''' % { "reg" : reg } 397 if readDest: 398 for reg in range(2): 399 eWalkCode += ''' 400 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 401 ''' % { "reg" : reg } 402 else: 403 eWalkCode += ''' 404 destReg.elements[0] = 0; 405 ''' % { "reg" : reg } 406 readDestCode = '' 407 if readDest: 408 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 409 scalarCheck = ''' 410 if (i != 0) { 411 destReg.elements[i] = 0; 412 continue; 413 } 414 ''' 415 eWalkCode += ''' 416 for (unsigned i = 0; i < eCount; i++) { 417 %(scalarCheck)s 418 BigElement srcElem1 = gtoh(srcReg1.elements[i]); 419 Element destElem; 420 %(readDest)s 421 %(op)s 422 destReg.elements[i] = htog(destElem); 423 } 424 ''' % { "op" : op, "readDest" : readDestCode, 425 "scalarCheck" : scalarCheck if scalar else "" } 426 destReg = 0 if not hi else 2 427 for reg in range(2): 428 eWalkCode += ''' 429 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); 430 ''' % { "reg" : reg, "destReg": destReg } 431 destReg += 1 432 if not hi: 433 for reg in range(2, 4): # zero upper half 434 eWalkCode += ''' 435 AA64FpDestP%(reg)d_uw = 0; 436 ''' % { "reg" : reg } 437 iop = InstObjParams(name, Name, 438 "DataX1RegImmOp" if hasImm else "DataX1RegOp", 439 { "code": eWalkCode, 440 "r_count": 2, 441 "op_class": opClass }, []) 442 if hasImm: 443 header_output += NeonX1RegImmOpDeclare.subst(iop) 444 else: 445 header_output += NeonX1RegOpDeclare.subst(iop) 446 exec_output += NeonXUnequalRegOpExecute.subst(iop) 447 for type in types: 448 substDict = { "targs" : type, 449 "class_name" : Name } 450 exec_output += NeonXExecDeclare.subst(substDict) 451 452 def threeRegScrambleInstX(name, Name, opClass, types, rCount, op): 453 global header_output, exec_output 454 eWalkCode = simd64EnabledCheckCode + ''' 455 RegVect srcReg1, srcReg2, destReg; 456 ''' 457 for reg in range(rCount): 458 eWalkCode += ''' 459 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 460 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 461 ''' % { "reg" : reg } 462 eWalkCode += op 463 for reg in range(rCount): 464 eWalkCode += ''' 465 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 466 ''' % { "reg" : reg } 467 if rCount < 4: 468 for reg in range(rCount, 4): 469 eWalkCode += ''' 470 AA64FpDestP%(reg)d_uw = 0; 471 ''' % { "reg" : reg } 472 iop = InstObjParams(name, Name, 473 "DataX2RegOp", 474 { "code": eWalkCode, 475 "r_count": rCount, 476 "op_class": opClass }, []) 477 header_output += NeonX2RegOpDeclare.subst(iop) 478 exec_output += NeonXEqualRegOpExecute.subst(iop) 479 for type in types: 480 substDict = { "targs" : type, 481 "class_name" : Name } 482 exec_output += NeonXExecDeclare.subst(substDict) 483 484 def insFromVecElemInstX(name, Name, opClass, types, rCount): 485 global header_output, exec_output 486 eWalkCode = simd64EnabledCheckCode + ''' 487 FullRegVect srcReg1; 488 RegVect destReg; 489 ''' 490 for reg in range(4): 491 eWalkCode += ''' 492 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 493 ''' % { "reg" : reg } 494 for reg in range(rCount): 495 eWalkCode += ''' 496 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 497 ''' % { "reg" : reg } 498 eWalkCode += ''' 499 Element srcElem1 = gtoh(srcReg1.elements[imm2]); 500 Element destElem = srcElem1; 501 destReg.elements[imm1] = htog(destElem); 502 ''' 503 for reg in range(rCount): 504 eWalkCode += ''' 505 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 506 ''' % { "reg" : reg } 507 iop = InstObjParams(name, Name, 508 "DataX1Reg2ImmOp", 509 { "code": eWalkCode, 510 "r_count": rCount, 511 "op_class": opClass }, []) 512 header_output += NeonX1Reg2ImmOpDeclare.subst(iop) 513 exec_output += NeonXEqualRegOpExecute.subst(iop) 514 for type in types: 515 substDict = { "targs" : type, 516 "class_name" : Name } 517 exec_output += NeonXExecDeclare.subst(substDict) 518 519 def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op): 520 global header_output, exec_output 521 eWalkCode = simd64EnabledCheckCode + ''' 522 RegVect srcReg1, destReg; 523 ''' 524 for reg in range(rCount): 525 eWalkCode += ''' 526 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 527 ''' % { "reg" : reg } 528 eWalkCode += ''' 529 Element srcElem1 = gtoh(srcReg1.elements[0]); 530 Element srcElem2 = gtoh(srcReg1.elements[1]); 531 Element destElem; 532 %(op)s 533 destReg.elements[0] = htog(destElem); 534 ''' % { "op" : op } 535 destCnt = rCount / 2 536 for reg in range(destCnt): 537 eWalkCode += ''' 538 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 539 ''' % { "reg" : reg } 540 for reg in range(destCnt, 4): # zero upper half 541 eWalkCode += ''' 542 AA64FpDestP%(reg)d_uw = 0; 543 ''' % { "reg" : reg } 544 iop = InstObjParams(name, Name, 545 "DataX1RegOp", 546 { "code": eWalkCode, 547 "r_count": rCount, 548 "op_class": opClass }, []) 549 header_output += NeonX1RegOpDeclare.subst(iop) 550 exec_output += NeonXEqualRegOpExecute.subst(iop) 551 for type in types: 552 substDict = { "targs" : type, 553 "class_name" : Name } 554 exec_output += NeonXExecDeclare.subst(substDict) 555 556 def twoRegAcrossInstX(name, Name, opClass, types, rCount, op, 557 doubleDest=False, long=False): 558 global header_output, exec_output 559 destPrefix = "Big" if long else "" 560 eWalkCode = simd64EnabledCheckCode + ''' 561 RegVect srcReg1; 562 %sRegVect destReg; 563 ''' % destPrefix 564 for reg in range(rCount): 565 eWalkCode += ''' 566 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 567 ''' % { "reg" : reg } 568 eWalkCode += ''' 569 destReg.regs[0] = 0; 570 %(destPrefix)sElement destElem = 0; 571 for (unsigned i = 0; i < eCount; i++) { 572 Element srcElem1 = gtoh(srcReg1.elements[i]); 573 if (i == 0) { 574 destElem = srcElem1; 575 } else { 576 %(op)s 577 } 578 } 579 destReg.elements[0] = htog(destElem); 580 ''' % { "op" : op, "destPrefix" : destPrefix } 581 destCnt = 2 if doubleDest else 1 582 for reg in range(destCnt): 583 eWalkCode += ''' 584 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 585 ''' % { "reg" : reg } 586 for reg in range(destCnt, 4): # zero upper half 587 eWalkCode += ''' 588 AA64FpDestP%(reg)d_uw = 0; 589 ''' % { "reg" : reg } 590 iop = InstObjParams(name, Name, 591 "DataX1RegOp", 592 { "code": eWalkCode, 593 "r_count": rCount, 594 "op_class": opClass }, []) 595 header_output += NeonX1RegOpDeclare.subst(iop) 596 if long: 597 exec_output += NeonXUnequalRegOpExecute.subst(iop) 598 else: 599 exec_output += NeonXEqualRegOpExecute.subst(iop) 600 for type in types: 601 substDict = { "targs" : type, 602 "class_name" : Name } 603 exec_output += NeonXExecDeclare.subst(substDict) 604 605 def twoRegCondenseInstX(name, Name, opClass, types, rCount, op, 606 readDest=False): 607 global header_output, exec_output 608 eWalkCode = simd64EnabledCheckCode + ''' 609 RegVect srcRegs; 610 BigRegVect destReg; 611 ''' 612 for reg in range(rCount): 613 eWalkCode += ''' 614 srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 615 ''' % { "reg" : reg } 616 if readDest: 617 eWalkCode += ''' 618 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 619 ''' % { "reg" : reg } 620 readDestCode = '' 621 if readDest: 622 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 623 eWalkCode += ''' 624 for (unsigned i = 0; i < eCount / 2; i++) { 625 Element srcElem1 = gtoh(srcRegs.elements[2 * i]); 626 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); 627 BigElement destElem; 628 %(readDest)s 629 %(op)s 630 destReg.elements[i] = htog(destElem); 631 } 632 ''' % { "op" : op, "readDest" : readDestCode } 633 for reg in range(rCount): 634 eWalkCode += ''' 635 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 636 ''' % { "reg" : reg } 637 if rCount < 4: # zero upper half 638 for reg in range(rCount, 4): 639 eWalkCode += ''' 640 AA64FpDestP%(reg)d_uw = 0; 641 ''' % { "reg" : reg } 642 iop = InstObjParams(name, Name, 643 "DataX1RegOp", 644 { "code": eWalkCode, 645 "r_count": rCount, 646 "op_class": opClass }, []) 647 header_output += NeonX1RegOpDeclare.subst(iop) 648 exec_output += NeonXUnequalRegOpExecute.subst(iop) 649 for type in types: 650 substDict = { "targs" : type, 651 "class_name" : Name } 652 exec_output += NeonXExecDeclare.subst(substDict) 653 654 def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False): 655 global header_output, exec_output 656 eWalkCode = simd64EnabledCheckCode + ''' 657 RegVect destReg; 658 ''' 659 if readDest: 660 for reg in range(rCount): 661 eWalkCode += ''' 662 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 663 ''' % { "reg" : reg } 664 readDestCode = '' 665 if readDest: 666 readDestCode = 'destElem = gtoh(destReg.elements[i]);' 667 eWalkCode += ''' 668 for (unsigned i = 0; i < eCount; i++) { 669 Element destElem; 670 %(readDest)s 671 %(op)s 672 destReg.elements[i] = htog(destElem); 673 } 674 ''' % { "op" : op, "readDest" : readDestCode } 675 for reg in range(rCount): 676 eWalkCode += ''' 677 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 678 ''' % { "reg" : reg } 679 if rCount < 4: # zero upper half 680 for reg in range(rCount, 4): 681 eWalkCode += ''' 682 AA64FpDestP%(reg)d_uw = 0; 683 ''' % { "reg" : reg } 684 iop = InstObjParams(name, Name, 685 "DataXImmOnlyOp", 686 { "code": eWalkCode, 687 "r_count": rCount, 688 "op_class": opClass }, []) 689 header_output += NeonX1RegImmOnlyOpDeclare.subst(iop) 690 exec_output += NeonXEqualRegOpExecute.subst(iop) 691 for type in types: 692 substDict = { "targs" : type, 693 "class_name" : Name } 694 exec_output += NeonXExecDeclare.subst(substDict) 695 696 def dupGprInstX(name, Name, opClass, types, rCount, gprSpec): 697 global header_output, exec_output 698 eWalkCode = simd64EnabledCheckCode + ''' 699 RegVect destReg; 700 for (unsigned i = 0; i < eCount; i++) { 701 destReg.elements[i] = htog((Element) %sOp1); 702 } 703 ''' % gprSpec 704 for reg in range(rCount): 705 eWalkCode += ''' 706 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 707 ''' % { "reg" : reg } 708 if rCount < 4: # zero upper half 709 for reg in range(rCount, 4): 710 eWalkCode += ''' 711 AA64FpDestP%(reg)d_uw = 0; 712 ''' % { "reg" : reg } 713 iop = InstObjParams(name, Name, 714 "DataX1RegOp", 715 { "code": eWalkCode, 716 "r_count": rCount, 717 "op_class": opClass }, []) 718 header_output += NeonX1RegOpDeclare.subst(iop) 719 exec_output += NeonXEqualRegOpExecute.subst(iop) 720 for type in types: 721 substDict = { "targs" : type, 722 "class_name" : Name } 723 exec_output += NeonXExecDeclare.subst(substDict) 724 725 def extInstX(name, Name, opClass, types, rCount, op): 726 global header_output, exec_output 727 eWalkCode = simd64EnabledCheckCode + ''' 728 RegVect srcReg1, srcReg2, destReg; 729 ''' 730 for reg in range(rCount): 731 eWalkCode += ''' 732 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 733 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 734 ''' % { "reg" : reg } 735 eWalkCode += op 736 for reg in range(rCount): 737 eWalkCode += ''' 738 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 739 ''' % { "reg" : reg } 740 if rCount < 4: # zero upper half 741 for reg in range(rCount, 4): 742 eWalkCode += ''' 743 AA64FpDestP%(reg)d_uw = 0; 744 ''' % { "reg" : reg } 745 iop = InstObjParams(name, Name, 746 "DataX2RegImmOp", 747 { "code": eWalkCode, 748 "r_count": rCount, 749 "op_class": opClass }, []) 750 header_output += NeonX2RegImmOpDeclare.subst(iop) 751 exec_output += NeonXEqualRegOpExecute.subst(iop) 752 for type in types: 753 substDict = { "targs" : type, 754 "class_name" : Name } 755 exec_output += NeonXExecDeclare.subst(substDict) 756 757 def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec): 758 global header_output, exec_output 759 eWalkCode = simd64EnabledCheckCode + ''' 760 RegVect destReg; 761 ''' 762 for reg in range(rCount): 763 eWalkCode += ''' 764 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 765 ''' % { "reg" : reg } 766 eWalkCode += ''' 767 destReg.elements[imm] = htog((Element) %sOp1); 768 ''' % gprSpec 769 for reg in range(rCount): 770 eWalkCode += ''' 771 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 772 ''' % { "reg" : reg } 773 iop = InstObjParams(name, Name, 774 "DataX1RegImmOp", 775 { "code": eWalkCode, 776 "r_count": rCount, 777 "op_class": opClass }, []) 778 header_output += NeonX1RegImmOpDeclare.subst(iop) 779 exec_output += NeonXEqualRegOpExecute.subst(iop) 780 for type in types: 781 substDict = { "targs" : type, 782 "class_name" : Name } 783 exec_output += NeonXExecDeclare.subst(substDict) 784 785 def insToGprInstX(name, Name, opClass, types, rCount, gprSpec, 786 signExt=False): 787 global header_output, exec_output 788 eWalkCode = simd64EnabledCheckCode + ''' 789 FullRegVect srcReg; 790 ''' 791 for reg in range(4): 792 eWalkCode += ''' 793 srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); 794 ''' % { "reg" : reg } 795 if signExt: 796 eWalkCode += ''' 797 %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]); 798 ''' % gprSpec 799 else: 800 eWalkCode += ''' 801 %sDest = srcReg.elements[imm]; 802 ''' % gprSpec 803 iop = InstObjParams(name, Name, 804 "DataX1RegImmOp", 805 { "code": eWalkCode, 806 "r_count": rCount, 807 "op_class": opClass }, []) 808 header_output += NeonX1RegImmOpDeclare.subst(iop) 809 exec_output += NeonXEqualRegOpExecute.subst(iop) 810 for type in types: 811 substDict = { "targs" : type, 812 "class_name" : Name } 813 exec_output += NeonXExecDeclare.subst(substDict) 814 815 def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount): 816 global header_output, decoder_output, exec_output 817 code = simd64EnabledCheckCode + ''' 818 union 819 { 820 uint8_t bytes[64]; 821 FloatRegBits regs[16]; 822 } table; 823 824 union 825 { 826 uint8_t bytes[%(rCount)d * 4]; 827 FloatRegBits regs[%(rCount)d]; 828 } destReg, srcReg2; 829 830 const unsigned length = %(length)d; 831 const bool isTbl = %(isTbl)s; 832 ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl } 833 for reg in range(rCount): 834 code += ''' 835 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); 836 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); 837 ''' % { "reg" : reg } 838 for reg in range(16): 839 if reg < length * 4: 840 code += ''' 841 table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw); 842 ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 } 843 else: 844 code += ''' 845 table.regs[%(reg)d] = 0; 846 ''' % { "reg" : reg } 847 code += ''' 848 for (unsigned i = 0; i < sizeof(destReg); i++) { 849 uint8_t index = srcReg2.bytes[i]; 850 if (index < 16 * length) { 851 destReg.bytes[i] = table.bytes[index]; 852 } else { 853 if (isTbl) 854 destReg.bytes[i] = 0; 855 // else destReg.bytes[i] unchanged 856 } 857 } 858 ''' 859 for reg in range(rCount): 860 code += ''' 861 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); 862 ''' % { "reg" : reg } 863 if rCount < 4: # zero upper half 864 for reg in range(rCount, 4): 865 code += ''' 866 AA64FpDestP%(reg)d_uw = 0; 867 ''' % { "reg" : reg } 868 iop = InstObjParams(name, Name, 869 "DataX2RegOp", 870 { "code": code, 871 "r_count": rCount, 872 "op_class": opClass }, []) 873 header_output += NeonX2RegOpDeclare.subst(iop) 874 exec_output += NeonXEqualRegOpExecute.subst(iop) 875 for type in types: 876 substDict = { "targs" : type, 877 "class_name" : Name } 878 exec_output += NeonXExecDeclare.subst(substDict) 879 880 # ABS 881 absCode = ''' 882 if (srcElem1 < 0) { 883 destElem = -srcElem1; 884 } else { 885 destElem = srcElem1; 886 } 887 ''' 888 twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode) 889 twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode) 890 # ADD 891 addCode = "destElem = srcElem1 + srcElem2;" 892 threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode) 893 threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode) 894 # ADDHN, ADDHN2 895 addhnCode = ''' 896 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> 897 (sizeof(Element) * 8); 898 ''' 899 threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes, 900 addhnCode) 901 threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes, 902 addhnCode, hi=True) 903 # ADDP (scalar) 904 twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4, 905 addCode) 906 # ADDP (vector) 907 threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2, 908 addCode, pairwise=True) 909 threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4, 910 addCode, pairwise=True) 911 # ADDV 912 # Note: SimdAddOp can be a bit optimistic here 913 addAcrossCode = "destElem += srcElem1;" 914 twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"), 915 2, addAcrossCode) 916 twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4, 917 addAcrossCode) 918 # AND 919 andCode = "destElem = srcElem1 & srcElem2;" 920 threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode) 921 threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode) 922 # BIC (immediate) 923 bicImmCode = "destElem &= ~imm;" 924 oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2, 925 bicImmCode, True) 926 oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4, 927 bicImmCode, True) 928 # BIC (register) 929 bicCode = "destElem = srcElem1 & ~srcElem2;" 930 threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode) 931 threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode) 932 # BIF 933 bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);" 934 threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode, 935 True) 936 threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode, 937 True) 938 # BIT 939 bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);" 940 threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode, 941 True) 942 threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode, 943 True) 944 # BSL 945 bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);" 946 threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode, 947 True) 948 threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode, 949 True) 950 # CLS 951 clsCode = ''' 952 unsigned count = 0; 953 if (srcElem1 < 0) { 954 srcElem1 <<= 1; 955 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { 956 count++; 957 srcElem1 <<= 1; 958 } 959 } else { 960 srcElem1 <<= 1; 961 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { 962 count++; 963 srcElem1 <<= 1; 964 } 965 } 966 destElem = count; 967 ''' 968 twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode) 969 twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode) 970 # CLZ 971 clzCode = ''' 972 unsigned count = 0; 973 while (srcElem1 >= 0 && count < sizeof(Element) * 8) { 974 count++; 975 srcElem1 <<= 1; 976 } 977 destElem = count; 978 ''' 979 twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode) 980 twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode) 981 # CMEQ (register) 982 cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;" 983 threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2, 984 cmeqCode) 985 threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4, 986 cmeqCode) 987 # CMEQ (zero) 988 cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;" 989 twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2, 990 cmeqZeroCode) 991 twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4, 992 cmeqZeroCode) 993 # CMGE (register) 994 cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;" 995 threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode) 996 threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode) 997 # CMGE (zero) 998 cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;" 999 twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2, 1000 cmgeZeroCode) 1001 twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4, 1002 cmgeZeroCode) 1003 # CMGT (register) 1004 cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;" 1005 threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode) 1006 threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode) 1007 # CMGT (zero) 1008 cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;" 1009 twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2, 1010 cmgtZeroCode) 1011 twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4, 1012 cmgtZeroCode) 1013 # CMHI (register) 1014 threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2, 1015 cmgtCode) 1016 threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4, 1017 cmgtCode) 1018 # CMHS (register) 1019 threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2, 1020 cmgeCode) 1021 threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4, 1022 cmgeCode) 1023 # CMLE (zero) 1024 cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;" 1025 twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2, 1026 cmleZeroCode) 1027 twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4, 1028 cmleZeroCode) 1029 # CMLT (zero) 1030 cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;" 1031 twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2, 1032 cmltZeroCode) 1033 twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4, 1034 cmltZeroCode) 1035 # CMTST (register) 1036 tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;" 1037 threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2, 1038 tstCode) 1039 threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4, 1040 tstCode) 1041 # CNT 1042 cntCode = ''' 1043 unsigned count = 0; 1044 while (srcElem1 && count < sizeof(Element) * 8) { 1045 count += srcElem1 & 0x1; 1046 srcElem1 >>= 1; 1047 } 1048 destElem = count; 1049 ''' 1050 twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode) 1051 twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode) 1052 # DUP (element) 1053 dupCode = "destElem = srcElem1;" 1054 twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2, 1055 dupCode, isDup=True, byElem=True) 1056 twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4, 1057 dupCode, isDup=True, byElem=True) 1058 twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4, 1059 dupCode, isDup=True, byElem=True, scalar=True) 1060 # DUP (general register) 1061 dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W') 1062 dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') 1063 dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X') 1064 # EOR 1065 eorCode = "destElem = srcElem1 ^ srcElem2;" 1066 threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode) 1067 threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode) 1068 # EXT 1069 extCode = ''' 1070 for (unsigned i = 0; i < eCount; i++) { 1071 unsigned index = i + imm; 1072 if (index < eCount) { 1073 destReg.elements[i] = srcReg1.elements[index]; 1074 } else { 1075 index -= eCount; 1076 if (index >= eCount) { 1077 fault = std::make_shared<UndefinedInstruction>( 1078 machInst, false, mnemonic); 1079 } else { 1080 destReg.elements[i] = srcReg2.elements[index]; 1081 } 1082 } 1083 } 1084 ''' 1085 extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode) 1086 extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode) 1087 # FABD 1088 fpOp = ''' 1089 FPSCR fpscr = (FPSCR) FpscrExc; 1090 destElem = %s; 1091 FpscrExc = fpscr; 1092 ''' 1093 fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))" 1094 threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2, 1095 fabdCode) 1096 threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4, 1097 fabdCode) 1098 threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4, 1099 fabdCode, scalar=True) 1100 # FABS 1101 fabsCode = fpOp % "fplibAbs<Element>(srcElem1)" 1102 twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2, 1103 fabsCode) 1104 twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4, 1105 fabsCode) 1106 # FACGE 1107 fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1)," 1108 " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0") 1109 facgeCode = fpCmpAbsOp % "GE" 1110 threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes, 1111 2, facgeCode) 1112 threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4, 1113 facgeCode) 1114 threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4, 1115 facgeCode, scalar=True) 1116 # FACGT 1117 facgtCode = fpCmpAbsOp % "GT" 1118 threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes, 1119 2, facgtCode) 1120 threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4, 1121 facgtCode) 1122 threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4, 1123 facgtCode, scalar=True) 1124 # FADD 1125 fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)" 1126 faddCode = fpBinOp % "Add" 1127 threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2, 1128 faddCode) 1129 threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4, 1130 faddCode) 1131 # FADDP (scalar) 1132 twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp", 1133 ("uint32_t",), 2, faddCode) 1134 twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp", 1135 ("uint64_t",), 4, faddCode) 1136 # FADDP (vector) 1137 threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes, 1138 2, faddCode, pairwise=True) 1139 threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4, 1140 faddCode, pairwise=True) 1141 # FCMEQ (register) 1142 fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?" 1143 " -1 : 0") 1144 fcmeqCode = fpCmpOp % "EQ" 1145 threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes, 1146 2, fcmeqCode) 1147 threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4, 1148 fcmeqCode) 1149 threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4, 1150 fcmeqCode, scalar=True) 1151 # FCMEQ (zero) 1152 fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0" 1153 fcmeqZeroCode = fpCmpZeroOp % "EQ" 1154 twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1155 2, fcmeqZeroCode) 1156 twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1157 fcmeqZeroCode) 1158 twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1159 fcmeqZeroCode, scalar=True) 1160 # FCMGE (register) 1161 fcmgeCode = fpCmpOp % "GE" 1162 threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes, 1163 2, fcmgeCode) 1164 threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4, 1165 fcmgeCode) 1166 threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4, 1167 fcmgeCode, scalar=True) 1168 # FCMGE (zero) 1169 fcmgeZeroCode = fpCmpZeroOp % "GE" 1170 twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1171 2, fcmgeZeroCode) 1172 twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1173 fcmgeZeroCode) 1174 twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1175 fcmgeZeroCode, scalar=True) 1176 # FCMGT (register) 1177 fcmgtCode = fpCmpOp % "GT" 1178 threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes, 1179 2, fcmgtCode) 1180 threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4, 1181 fcmgtCode) 1182 threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4, 1183 fcmgtCode, scalar=True) 1184 # FCMGT (zero) 1185 fcmgtZeroCode = fpCmpZeroOp % "GT" 1186 twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1187 2, fcmgtZeroCode) 1188 twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1189 fcmgtZeroCode) 1190 twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1191 fcmgtZeroCode, scalar=True) 1192 # FCMLE (zero) 1193 fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?" 1194 " -1 : 0") 1195 fcmleZeroCode = fpCmpRevZeroOp % "GE" 1196 twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1197 2, fcmleZeroCode) 1198 twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1199 fcmleZeroCode) 1200 twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1201 fcmleZeroCode, scalar=True) 1202 # FCMLT (zero) 1203 fcmltZeroCode = fpCmpRevZeroOp % "GT" 1204 twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes, 1205 2, fcmltZeroCode) 1206 twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4, 1207 fcmltZeroCode) 1208 twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4, 1209 fcmltZeroCode, scalar=True) 1210 # FCVTAS 1211 fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>(" 1212 "srcElem1, %s, %s, %s, fpscr)") 1213 fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY") 1214 twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2, 1215 fcvtasCode) 1216 twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4, 1217 fcvtasCode) 1218 twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4, 1219 fcvtasCode, scalar=True) 1220 # FCVTAU 1221 fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY") 1222 twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2, 1223 fcvtauCode) 1224 twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4, 1225 fcvtauCode) 1226 twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4, 1227 fcvtauCode, scalar=True) 1228 # FCVTL, FCVTL2 1229 fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>(" 1230 "srcElem1, FPCRRounding(fpscr), fpscr)") 1231 twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"), 1232 fcvtlCode) 1233 twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"), 1234 fcvtlCode, hi=True) 1235 # FCVTMS 1236 fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF") 1237 twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2, 1238 fcvtmsCode) 1239 twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4, 1240 fcvtmsCode) 1241 twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4, 1242 fcvtmsCode, scalar=True) 1243 # FCVTMU 1244 fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF") 1245 twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2, 1246 fcvtmuCode) 1247 twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4, 1248 fcvtmuCode) 1249 twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4, 1250 fcvtmuCode, scalar=True) 1251 # FCVTN, FCVTN2 1252 fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>(" 1253 "srcElem1, FPCRRounding(fpscr), fpscr)") 1254 twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp", 1255 ("uint16_t", "uint32_t"), fcvtnCode) 1256 twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp", 1257 ("uint16_t", "uint32_t"), fcvtnCode, hi=True) 1258 # FCVTNS 1259 fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN") 1260 twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2, 1261 fcvtnsCode) 1262 twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4, 1263 fcvtnsCode) 1264 twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4, 1265 fcvtnsCode, scalar=True) 1266 # FCVTNU 1267 fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN") 1268 twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2, 1269 fcvtnuCode) 1270 twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4, 1271 fcvtnuCode) 1272 twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4, 1273 fcvtnuCode, scalar=True) 1274 # FCVTPS 1275 fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF") 1276 twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2, 1277 fcvtpsCode) 1278 twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4, 1279 fcvtpsCode) 1280 twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4, 1281 fcvtpsCode, scalar=True) 1282 # FCVTPU 1283 fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF") 1284 twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2, 1285 fcvtpuCode) 1286 twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4, 1287 fcvtpuCode) 1288 twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4, 1289 fcvtpuCode, scalar=True) 1290 # FCVTXN, FCVTXN2 1291 fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>(" 1292 "srcElem1, FPRounding_ODD, fpscr)") 1293 twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes, 1294 fcvtxnCode) 1295 twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes, 1296 fcvtxnCode, hi=True) 1297 twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes, 1298 fcvtxnCode, scalar=True) 1299 # FCVTZS (fixed-point) 1300 fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO") 1301 twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes, 1302 2, fcvtzsCode, hasImm=True) 1303 twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4, 1304 fcvtzsCode, hasImm=True) 1305 twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4, 1306 fcvtzsCode, hasImm=True, scalar=True) 1307 # FCVTZS (integer) 1308 fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO") 1309 twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes, 1310 2, fcvtzsIntCode) 1311 twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4, 1312 fcvtzsIntCode) 1313 twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4, 1314 fcvtzsIntCode, scalar=True) 1315 # FCVTZU (fixed-point) 1316 fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO") 1317 twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes, 1318 2, fcvtzuCode, hasImm=True) 1319 twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4, 1320 fcvtzuCode, hasImm=True) 1321 twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4, 1322 fcvtzuCode, hasImm=True, scalar=True) 1323 # FCVTZU (integer) 1324 fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO") 1325 twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2, 1326 fcvtzuIntCode) 1327 twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4, 1328 fcvtzuIntCode) 1329 twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4, 1330 fcvtzuIntCode, scalar=True) 1331 # FDIV 1332 fdivCode = fpBinOp % "Div" 1333 threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2, 1334 fdivCode) 1335 threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4, 1336 fdivCode) 1337 # FMAX 1338 fmaxCode = fpBinOp % "Max" 1339 threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2, 1340 fmaxCode) 1341 threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4, 1342 fmaxCode) 1343 # FMAXNM 1344 fmaxnmCode = fpBinOp % "MaxNum" 1345 threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes, 1346 2, fmaxnmCode) 1347 threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4, 1348 fmaxnmCode) 1349 # FMAXNMP (scalar) 1350 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp", 1351 ("uint32_t",), 2, fmaxnmCode) 1352 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp", 1353 ("uint64_t",), 4, fmaxnmCode) 1354 # FMAXNMP (vector) 1355 threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp", 1356 smallFloatTypes, 2, fmaxnmCode, pairwise=True) 1357 threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4, 1358 fmaxnmCode, pairwise=True) 1359 # FMAXNMV 1360 # Note: SimdFloatCmpOp can be a bit optimistic here 1361 fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)" 1362 fmaxnmAcrossCode = fpAcrossOp % "MaxNum" 1363 twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",), 1364 4, fmaxnmAcrossCode) 1365 # FMAXP (scalar) 1366 twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp", 1367 ("uint32_t",), 2, fmaxCode) 1368 twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp", 1369 ("uint64_t",), 4, fmaxCode) 1370 # FMAXP (vector) 1371 threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes, 1372 2, fmaxCode, pairwise=True) 1373 threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4, 1374 fmaxCode, pairwise=True) 1375 # FMAXV 1376 # Note: SimdFloatCmpOp can be a bit optimistic here 1377 fmaxAcrossCode = fpAcrossOp % "Max" 1378 twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4, 1379 fmaxAcrossCode) 1380 # FMIN 1381 fminCode = fpBinOp % "Min" 1382 threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2, 1383 fminCode) 1384 threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4, 1385 fminCode) 1386 # FMINNM 1387 fminnmCode = fpBinOp % "MinNum" 1388 threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes, 1389 2, fminnmCode) 1390 threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4, 1391 fminnmCode) 1392 # FMINNMP (scalar) 1393 twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp", 1394 ("uint32_t",), 2, fminnmCode) 1395 twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp", 1396 ("uint64_t",), 4, fminnmCode) 1397 # FMINNMP (vector) 1398 threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp", 1399 smallFloatTypes, 2, fminnmCode, pairwise=True) 1400 threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4, 1401 fminnmCode, pairwise=True) 1402 # FMINNMV 1403 # Note: SimdFloatCmpOp can be a bit optimistic here 1404 fminnmAcrossCode = fpAcrossOp % "MinNum" 1405 twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",), 1406 4, fminnmAcrossCode) 1407 # FMINP (scalar) 1408 twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp", 1409 ("uint32_t",), 2, fminCode) 1410 twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp", 1411 ("uint64_t",), 4, fminCode) 1412 # FMINP (vector) 1413 threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes, 1414 2, fminCode, pairwise=True) 1415 threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4, 1416 fminCode, pairwise=True) 1417 # FMINV 1418 # Note: SimdFloatCmpOp can be a bit optimistic here 1419 fminAcrossCode = fpAcrossOp % "Min" 1420 twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4, 1421 fminAcrossCode) 1422 # FMLA (by element) 1423 fmlaCode = fpOp % ("fplibMulAdd<Element>(" 1424 "destElem, srcElem1, srcElem2, fpscr)") 1425 threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp", 1426 smallFloatTypes, 2, fmlaCode, True, byElem=True) 1427 threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes, 1428 4, fmlaCode, True, byElem=True) 1429 threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes, 1430 4, fmlaCode, True, byElem=True, scalar=True) 1431 # FMLA (vector) 1432 threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes, 1433 2, fmlaCode, True) 1434 threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4, 1435 fmlaCode, True) 1436 # FMLS (by element) 1437 fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem," 1438 " fplibNeg<Element>(srcElem1), srcElem2, fpscr)") 1439 threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp", 1440 smallFloatTypes, 2, fmlsCode, True, byElem=True) 1441 threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes, 1442 4, fmlsCode, True, byElem=True) 1443 threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes, 1444 4, fmlsCode, True, byElem=True, scalar=True) 1445 # FMLS (vector) 1446 threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes, 1447 2, fmlsCode, True) 1448 threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4, 1449 fmlsCode, True) 1450 # FMOV 1451 fmovCode = 'destElem = imm;' 1452 oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2, 1453 fmovCode) 1454 oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode) 1455 # FMUL (by element) 1456 fmulCode = fpBinOp % "Mul" 1457 threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp", 1458 smallFloatTypes, 2, fmulCode, byElem=True) 1459 threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4, 1460 fmulCode, byElem=True) 1461 threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4, 1462 fmulCode, byElem=True, scalar=True) 1463 # FMUL (vector) 1464 threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2, 1465 fmulCode) 1466 threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4, 1467 fmulCode) 1468 # FMULX 1469 fmulxCode = fpBinOp % "MulX" 1470 threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes, 1471 2, fmulxCode) 1472 threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4, 1473 fmulxCode) 1474 threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4, 1475 fmulxCode, scalar=True) 1476 # FMULX (by element) 1477 threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp", 1478 smallFloatTypes, 2, fmulxCode, byElem=True) 1479 threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes, 1480 4, fmulxCode, byElem=True) 1481 threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes, 1482 4, fmulxCode, byElem=True, scalar=True) 1483 # FNEG 1484 fnegCode = fpOp % "fplibNeg<Element>(srcElem1)" 1485 twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2, 1486 fnegCode) 1487 twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4, 1488 fnegCode) 1489 # FRECPE 1490 frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)" 1491 twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp", 1492 smallFloatTypes, 2, frecpeCode) 1493 twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4, 1494 frecpeCode) 1495 twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes, 1496 4, frecpeCode, scalar=True) 1497 # FRECPS 1498 frecpsCode = fpBinOp % "RecipStepFused" 1499 threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp", 1500 smallFloatTypes, 2, frecpsCode) 1501 threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes, 1502 4, frecpsCode) 1503 threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes, 1504 4, frecpsCode, scalar=True) 1505 # FRECPX 1506 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)" 1507 twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4, 1508 frecpxCode, scalar=True) 1509 # FRINTA 1510 frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)" 1511 frintaCode = frintCode % ("FPRounding_TIEAWAY", "false") 1512 twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2, 1513 frintaCode) 1514 twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4, 1515 frintaCode) 1516 # FRINTI 1517 frintiCode = frintCode % ("FPCRRounding(fpscr)", "false") 1518 twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2, 1519 frintiCode) 1520 twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4, 1521 frintiCode) 1522 # FRINTM 1523 frintmCode = frintCode % ("FPRounding_NEGINF", "false") 1524 twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2, 1525 frintmCode) 1526 twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4, 1527 frintmCode) 1528 # FRINTN 1529 frintnCode = frintCode % ("FPRounding_TIEEVEN", "false") 1530 twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2, 1531 frintnCode) 1532 twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4, 1533 frintnCode) 1534 # FRINTP 1535 frintpCode = frintCode % ("FPRounding_POSINF", "false") 1536 twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2, 1537 frintpCode) 1538 twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4, 1539 frintpCode) 1540 # FRINTX 1541 frintxCode = frintCode % ("FPCRRounding(fpscr)", "true") 1542 twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2, 1543 frintxCode) 1544 twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4, 1545 frintxCode) 1546 # FRINTZ 1547 frintzCode = frintCode % ("FPRounding_ZERO", "false") 1548 twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2, 1549 frintzCode) 1550 twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4, 1551 frintzCode) 1552 # FRSQRTE 1553 frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)" 1554 twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp", 1555 smallFloatTypes, 2, frsqrteCode) 1556 twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4, 1557 frsqrteCode) 1558 twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4, 1559 frsqrteCode, scalar=True) 1560 # FRSQRTS 1561 frsqrtsCode = fpBinOp % "RSqrtStepFused" 1562 threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp", 1563 smallFloatTypes, 2, frsqrtsCode) 1564 threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes, 1565 4, frsqrtsCode) 1566 threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes, 1567 4, frsqrtsCode, scalar=True) 1568 # FSQRT 1569 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)" 1570 twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2, 1571 fsqrtCode) 1572 twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4, 1573 fsqrtCode) 1574 # FSUB 1575 fsubCode = fpBinOp % "Sub" 1576 threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2, 1577 fsubCode) 1578 threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4, 1579 fsubCode) 1580 # INS (element) 1581 insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4) 1582 # INS (general register) 1583 insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4, 1584 'W') 1585 insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X') 1586 # MLA (by element) 1587 mlaCode = "destElem += srcElem1 * srcElem2;" 1588 threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp", 1589 ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True) 1590 threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp", 1591 ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True) 1592 # MLA (vector) 1593 threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2, 1594 mlaCode, True) 1595 threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4, 1596 mlaCode, True) 1597 # MLS (by element) 1598 mlsCode = "destElem -= srcElem1 * srcElem2;" 1599 threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp", 1600 ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True) 1601 threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp", 1602 ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True) 1603 # MLS (vector) 1604 threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2, 1605 mlsCode, True) 1606 threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4, 1607 mlsCode, True) 1608 # MOV (element) -> alias to INS (element) 1609 # MOV (from general) -> alias to INS (general register) 1610 # MOV (scalar) -> alias to DUP (element) 1611 # MOV (to general) -> alias to UMOV 1612 # MOV (vector) -> alias to ORR (register) 1613 # MOVI 1614 movImmCode = "destElem = imm;" 1615 oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2, 1616 movImmCode) 1617 oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4, 1618 movImmCode) 1619 # MUL (by element) 1620 mulCode = "destElem = srcElem1 * srcElem2;" 1621 threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp", 1622 ("uint16_t", "uint32_t"), 2, mulCode, byElem=True) 1623 threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp", 1624 ("uint16_t", "uint32_t"), 4, mulCode, byElem=True) 1625 # MUL (vector) 1626 threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2, 1627 mulCode) 1628 threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4, 1629 mulCode) 1630 # MVN 1631 mvnCode = "destElem = ~srcElem1;" 1632 twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode) 1633 twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode) 1634 # MVNI 1635 mvniCode = "destElem = ~imm;" 1636 oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode) 1637 oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode) 1638 # NEG 1639 negCode = "destElem = -srcElem1;" 1640 twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode) 1641 twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode) 1642 # NOT -> alias to MVN 1643 # ORN 1644 ornCode = "destElem = srcElem1 | ~srcElem2;" 1645 threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode) 1646 threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode) 1647 # ORR (immediate) 1648 orrImmCode = "destElem |= imm;" 1649 oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2, 1650 orrImmCode, True) 1651 oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4, 1652 orrImmCode, True) 1653 # ORR (register) 1654 orrCode = "destElem = srcElem1 | srcElem2;" 1655 threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode) 1656 threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode) 1657 # PMUL 1658 pmulCode = ''' 1659 destElem = 0; 1660 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 1661 if (bits(srcElem2, j)) 1662 destElem ^= srcElem1 << j; 1663 } 1664 ''' 1665 threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2, 1666 pmulCode) 1667 threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4, 1668 pmulCode) 1669 # PMULL, PMULL2 1670 # Note: 64-bit PMULL is not available (Crypto. Extension) 1671 pmullCode = ''' 1672 destElem = 0; 1673 for (unsigned j = 0; j < sizeof(Element) * 8; j++) { 1674 if (bits(srcElem2, j)) 1675 destElem ^= (BigElement)srcElem1 << j; 1676 } 1677 ''' 1678 threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode) 1679 threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",), 1680 pmullCode, hi=True) 1681 # RADDHN, RADDHN2 1682 raddhnCode = ''' 1683 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + 1684 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1685 (sizeof(Element) * 8); 1686 ''' 1687 threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes, 1688 raddhnCode) 1689 threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes, 1690 raddhnCode, hi=True) 1691 # RBIT 1692 rbitCode = ''' 1693 destElem = 0; 1694 Element temp = srcElem1; 1695 for (int i = 0; i < 8 * sizeof(Element); i++) { 1696 destElem = destElem | ((temp & 0x1) << 1697 (8 * sizeof(Element) - 1 - i)); 1698 temp >>= 1; 1699 } 1700 ''' 1701 twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode) 1702 twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode) 1703 # REV16 1704 rev16Code = ''' 1705 destElem = srcElem1; 1706 unsigned groupSize = ((1 << 1) / sizeof(Element)); 1707 unsigned reverseMask = (groupSize - 1); 1708 j = i ^ reverseMask; 1709 ''' 1710 twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2, 1711 rev16Code) 1712 twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4, 1713 rev16Code) 1714 # REV32 1715 rev32Code = ''' 1716 destElem = srcElem1; 1717 unsigned groupSize = ((1 << 2) / sizeof(Element)); 1718 unsigned reverseMask = (groupSize - 1); 1719 j = i ^ reverseMask; 1720 ''' 1721 twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"), 1722 2, rev32Code) 1723 twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"), 1724 4, rev32Code) 1725 # REV64 1726 rev64Code = ''' 1727 destElem = srcElem1; 1728 unsigned groupSize = ((1 << 3) / sizeof(Element)); 1729 unsigned reverseMask = (groupSize - 1); 1730 j = i ^ reverseMask; 1731 ''' 1732 twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2, 1733 rev64Code) 1734 twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4, 1735 rev64Code) 1736 # RSHRN, RSHRN2 1737 rshrnCode = ''' 1738 if (imm > sizeof(srcElem1) * 8) { 1739 destElem = 0; 1740 } else if (imm) { 1741 Element rBit = bits(srcElem1, imm - 1); 1742 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 1743 } else { 1744 destElem = srcElem1; 1745 } 1746 ''' 1747 twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes, 1748 rshrnCode, hasImm=True) 1749 twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes, 1750 rshrnCode, hasImm=True, hi=True) 1751 # RSUBHN, RSUBHN2 1752 rsubhnCode = ''' 1753 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + 1754 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> 1755 (sizeof(Element) * 8); 1756 ''' 1757 threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes, 1758 rsubhnCode) 1759 threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes, 1760 rsubhnCode, hi=True) 1761 # SABA 1762 abaCode = ''' 1763 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1764 (srcElem2 - srcElem1); 1765 ''' 1766 threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2, 1767 abaCode, True) 1768 threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4, 1769 abaCode, True) 1770 # SABAL, SABAL2 1771 abalCode = ''' 1772 destElem += (srcElem1 > srcElem2) ? 1773 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1774 ((BigElement)srcElem2 - (BigElement)srcElem1); 1775 ''' 1776 threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes, 1777 abalCode, True) 1778 threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes, 1779 abalCode, True, hi=True) 1780 # SABD 1781 abdCode = ''' 1782 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : 1783 (srcElem2 - srcElem1); 1784 ''' 1785 threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2, 1786 abdCode) 1787 threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4, 1788 abdCode) 1789 # SABDL, SABDL2 1790 abdlCode = ''' 1791 destElem = (srcElem1 > srcElem2) ? 1792 ((BigElement)srcElem1 - (BigElement)srcElem2) : 1793 ((BigElement)srcElem2 - (BigElement)srcElem1); 1794 ''' 1795 threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes, 1796 abdlCode, True) 1797 threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes, 1798 abdlCode, True, hi=True) 1799 # SADALP 1800 adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;" 1801 twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2, 1802 adalpCode, True) 1803 twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4, 1804 adalpCode, True) 1805 # SADDL, SADDL2 1806 addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;" 1807 threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes, 1808 addlwCode) 1809 threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes, 1810 addlwCode, hi=True) 1811 # SADDLP 1812 twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2, 1813 addlwCode) 1814 twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4, 1815 addlwCode) 1816 # SADDLV 1817 # Note: SimdAddOp can be a bit optimistic here 1818 addAcrossLongCode = "destElem += (BigElement)srcElem1;" 1819 twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"), 1820 2, addAcrossLongCode, long=True) 1821 twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"), 1822 4, addAcrossLongCode, long=True) 1823 twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4, 1824 addAcrossLongCode, doubleDest=True, long=True) 1825 # SADDW, SADDW2 1826 threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes, 1827 addlwCode) 1828 threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes, 1829 addlwCode, hi=True) 1830 # SCVTF (fixed-point) 1831 scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm," 1832 " false, FPCRRounding(fpscr), fpscr)") 1833 twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, 1834 scvtfFixedCode % 32, hasImm=True) 1835 twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4, 1836 scvtfFixedCode % 32, hasImm=True) 1837 twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4, 1838 scvtfFixedCode % 64, hasImm=True) 1839 twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes, 1840 4, scvtfFixedCode % 32, hasImm=True, scalar=True) 1841 twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4, 1842 scvtfFixedCode % 64, hasImm=True, scalar=True) 1843 # SCVTF (integer) 1844 scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0," 1845 " false, FPCRRounding(fpscr), fpscr)") 1846 twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, 1847 scvtfIntCode % 32) 1848 twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4, 1849 scvtfIntCode % 32) 1850 twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4, 1851 scvtfIntCode % 64) 1852 twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4, 1853 scvtfIntCode % 32, scalar=True) 1854 twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4, 1855 scvtfIntCode % 64, scalar=True) 1856 # SHADD 1857 haddCode = ''' 1858 Element carryBit = 1859 (((unsigned)srcElem1 & 0x1) + 1860 ((unsigned)srcElem2 & 0x1)) >> 1; 1861 // Use division instead of a shift to ensure the sign extension works 1862 // right. The compiler will figure out if it can be a shift. Mask the 1863 // inputs so they get truncated correctly. 1864 destElem = (((srcElem1 & ~(Element)1) / 2) + 1865 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 1866 ''' 1867 threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2, 1868 haddCode) 1869 threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4, 1870 haddCode) 1871 # SHL 1872 shlCode = ''' 1873 if (imm >= sizeof(Element) * 8) 1874 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; 1875 else 1876 destElem = srcElem1 << imm; 1877 ''' 1878 twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode, 1879 hasImm=True) 1880 twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode, 1881 hasImm=True) 1882 # SHLL, SHLL2 1883 shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);" 1884 twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode) 1885 twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode, 1886 hi=True) 1887 # SHRN, SHRN2 1888 shrnCode = ''' 1889 if (imm >= sizeof(srcElem1) * 8) { 1890 destElem = 0; 1891 } else { 1892 destElem = srcElem1 >> imm; 1893 } 1894 ''' 1895 twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes, 1896 shrnCode, hasImm=True) 1897 twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes, 1898 shrnCode, hasImm=True, hi=True) 1899 # SHSUB 1900 hsubCode = ''' 1901 Element borrowBit = 1902 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; 1903 // Use division instead of a shift to ensure the sign extension works 1904 // right. The compiler will figure out if it can be a shift. Mask the 1905 // inputs so they get truncated correctly. 1906 destElem = (((srcElem1 & ~(Element)1) / 2) - 1907 ((srcElem2 & ~(Element)1) / 2)) - borrowBit; 1908 ''' 1909 threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2, 1910 hsubCode) 1911 threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4, 1912 hsubCode) 1913 # SLI 1914 sliCode = ''' 1915 if (imm >= sizeof(Element) * 8) 1916 destElem = destElem; 1917 else 1918 destElem = (srcElem1 << imm) | (destElem & mask(imm)); 1919 ''' 1920 twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode, 1921 True, hasImm=True) 1922 twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode, 1923 True, hasImm=True) 1924 # SMAX 1925 maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;" 1926 threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2, 1927 maxCode) 1928 threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4, 1929 maxCode) 1930 # SMAXP 1931 threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2, 1932 maxCode, pairwise=True) 1933 threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4, 1934 maxCode, pairwise=True) 1935 # SMAXV 1936 maxAcrossCode = ''' 1937 if (i == 0 || srcElem1 > destElem) 1938 destElem = srcElem1; 1939 ''' 1940 twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"), 1941 2, maxAcrossCode) 1942 twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4, 1943 maxAcrossCode) 1944 # SMIN 1945 minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;" 1946 threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2, 1947 minCode) 1948 threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4, 1949 minCode) 1950 # SMINP 1951 threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2, 1952 minCode, pairwise=True) 1953 threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4, 1954 minCode, pairwise=True) 1955 # SMINV 1956 minAcrossCode = ''' 1957 if (i == 0 || srcElem1 < destElem) 1958 destElem = srcElem1; 1959 ''' 1960 twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"), 1961 2, minAcrossCode) 1962 twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4, 1963 minAcrossCode) 1964 1965 split('exec') 1966 1967 # SMLAL, SMLAL2 (by element) 1968 mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;" 1969 threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp", 1970 ("int16_t", "int32_t"), mlalCode, True, byElem=True) 1971 threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp", 1972 ("int16_t", "int32_t"), mlalCode, True, byElem=True, 1973 hi=True) 1974 # SMLAL, SMLAL2 (vector) 1975 threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes, 1976 mlalCode, True) 1977 threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes, 1978 mlalCode, True, hi=True) 1979 # SMLSL, SMLSL2 (by element) 1980 mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;" 1981 threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes, 1982 mlslCode, True, byElem=True) 1983 threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp", 1984 smallSignedTypes, mlslCode, True, byElem=True, hi=True) 1985 # SMLSL, SMLSL2 (vector) 1986 threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes, 1987 mlslCode, True) 1988 threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes, 1989 mlslCode, True, hi=True) 1990 # SMOV 1991 insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4, 1992 'W', True) 1993 insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X', 1994 True) 1995 # SMULL, SMULL2 (by element) 1996 mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;" 1997 threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes, 1998 mullCode, byElem=True) 1999 threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes, 2000 mullCode, byElem=True, hi=True) 2001 # SMULL, SMULL2 (vector) 2002 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes, 2003 mullCode) 2004 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes, 2005 mullCode, hi=True) 2006 # SQABS 2007 sqabsCode = ''' 2008 FPSCR fpscr = (FPSCR) FpscrQc; 2009 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2010 fpscr.qc = 1; 2011 destElem = ~srcElem1; 2012 } else if (srcElem1 < 0) { 2013 destElem = -srcElem1; 2014 } else { 2015 destElem = srcElem1; 2016 } 2017 FpscrQc = fpscr; 2018 ''' 2019 twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2, 2020 sqabsCode) 2021 twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4, 2022 sqabsCode) 2023 twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4, 2024 sqabsCode, scalar=True) 2025 # SQADD 2026 sqaddCode = ''' 2027 destElem = srcElem1 + srcElem2; 2028 FPSCR fpscr = (FPSCR) FpscrQc; 2029 bool negDest = (destElem < 0); 2030 bool negSrc1 = (srcElem1 < 0); 2031 bool negSrc2 = (srcElem2 < 0); 2032 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { 2033 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2034 if (negDest) 2035 destElem -= 1; 2036 fpscr.qc = 1; 2037 } 2038 FpscrQc = fpscr; 2039 ''' 2040 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2, 2041 sqaddCode) 2042 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4, 2043 sqaddCode) 2044 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4, 2045 sqaddCode, scalar=True) 2046 # SQDMLAL, SQDMLAL2 (by element) 2047 qdmlalCode = ''' 2048 FPSCR fpscr = (FPSCR) FpscrQc; 2049 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2050 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2051 Element halfNeg = maxNeg / 2; 2052 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2053 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2054 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2055 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2056 fpscr.qc = 1; 2057 } 2058 bool negPreDest = ltz(destElem); 2059 destElem += midElem; 2060 bool negDest = ltz(destElem); 2061 bool negMid = ltz(midElem); 2062 if (negPreDest == negMid && negMid != negDest) { 2063 destElem = mask(sizeof(BigElement) * 8 - 1); 2064 if (negPreDest) 2065 destElem = ~destElem; 2066 fpscr.qc = 1; 2067 } 2068 FpscrQc = fpscr; 2069 ''' 2070 threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp", 2071 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True) 2072 threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp", 2073 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, 2074 hi=True) 2075 threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp", 2076 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, 2077 scalar=True) 2078 # SQDMLAL, SQDMLAL2 (vector) 2079 threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp", 2080 ("int16_t", "int32_t"), qdmlalCode, True) 2081 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp", 2082 ("int16_t", "int32_t"), qdmlalCode, True, hi=True) 2083 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp", 2084 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True) 2085 # SQDMLSL, SQDMLSL2 (by element) 2086 qdmlslCode = ''' 2087 FPSCR fpscr = (FPSCR) FpscrQc; 2088 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2089 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2090 Element halfNeg = maxNeg / 2; 2091 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2092 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2093 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2094 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); 2095 fpscr.qc = 1; 2096 } 2097 bool negPreDest = ltz(destElem); 2098 destElem -= midElem; 2099 bool negDest = ltz(destElem); 2100 bool posMid = ltz((BigElement)-midElem); 2101 if (negPreDest == posMid && posMid != negDest) { 2102 destElem = mask(sizeof(BigElement) * 8 - 1); 2103 if (negPreDest) 2104 destElem = ~destElem; 2105 fpscr.qc = 1; 2106 } 2107 FpscrQc = fpscr; 2108 ''' 2109 threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp", 2110 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True) 2111 threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp", 2112 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, 2113 hi=True) 2114 threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp", 2115 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, 2116 scalar=True) 2117 # SQDMLSL, SQDMLSL2 (vector) 2118 threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp", 2119 ("int16_t", "int32_t"), qdmlslCode, True) 2120 threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp", 2121 ("int16_t", "int32_t"), qdmlslCode, True, hi=True) 2122 threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp", 2123 ("int16_t", "int32_t"), qdmlslCode, True, scalar=True) 2124 # SQDMULH (by element) 2125 sqdmulhCode = ''' 2126 FPSCR fpscr = (FPSCR) FpscrQc; 2127 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> 2128 (sizeof(Element) * 8); 2129 if (srcElem1 == srcElem2 && 2130 srcElem1 == (Element)((Element)1 << 2131 (sizeof(Element) * 8 - 1))) { 2132 destElem = ~srcElem1; 2133 fpscr.qc = 1; 2134 } 2135 FpscrQc = fpscr; 2136 ''' 2137 threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp", 2138 ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True) 2139 threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp", 2140 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True) 2141 threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp", 2142 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True, 2143 scalar=True) 2144 # SQDMULH (vector) 2145 threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp", 2146 ("int16_t", "int32_t"), 2, sqdmulhCode) 2147 threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp", 2148 ("int16_t", "int32_t"), 4, sqdmulhCode) 2149 threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp", 2150 ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True) 2151 # SQDMULL, SQDMULL2 (by element) 2152 qdmullCode = ''' 2153 FPSCR fpscr = (FPSCR) FpscrQc; 2154 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); 2155 if (srcElem1 == srcElem2 && 2156 srcElem1 == (Element)((Element)1 << 2157 (Element)(sizeof(Element) * 8 - 1))) { 2158 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); 2159 fpscr.qc = 1; 2160 } 2161 FpscrQc = fpscr; 2162 ''' 2163 threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp", 2164 ("int16_t", "int32_t"), qdmullCode, True, byElem=True) 2165 threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp", 2166 ("int16_t", "int32_t"), qdmullCode, True, byElem=True, 2167 hi=True) 2168 threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp", 2169 ("int16_t", "int32_t"), qdmullCode, True, byElem=True, 2170 scalar=True) 2171 # SQDMULL, SQDMULL2 (vector) 2172 threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp", 2173 ("int16_t", "int32_t"), qdmullCode, True) 2174 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp", 2175 ("int16_t", "int32_t"), qdmullCode, True, hi=True) 2176 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp", 2177 ("int16_t", "int32_t"), qdmullCode, True, scalar=True) 2178 # SQNEG 2179 sqnegCode = ''' 2180 FPSCR fpscr = (FPSCR) FpscrQc; 2181 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { 2182 fpscr.qc = 1; 2183 destElem = ~srcElem1; 2184 } else { 2185 destElem = -srcElem1; 2186 } 2187 FpscrQc = fpscr; 2188 ''' 2189 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2, 2190 sqnegCode) 2191 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4, 2192 sqnegCode) 2193 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4, 2194 sqnegCode, scalar=True) 2195 # SQRDMULH (by element) 2196 sqrdmulhCode = ''' 2197 FPSCR fpscr = (FPSCR) FpscrQc; 2198 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + 2199 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> 2200 (sizeof(Element) * 8); 2201 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); 2202 Element halfNeg = maxNeg / 2; 2203 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || 2204 (srcElem1 == halfNeg && srcElem2 == maxNeg) || 2205 (srcElem1 == maxNeg && srcElem2 == halfNeg)) { 2206 if (destElem < 0) { 2207 destElem = mask(sizeof(Element) * 8 - 1); 2208 } else { 2209 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2210 } 2211 fpscr.qc = 1; 2212 } 2213 FpscrQc = fpscr; 2214 ''' 2215 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp", 2216 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True) 2217 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp", 2218 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True) 2219 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp", 2220 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True, 2221 scalar=True) 2222 # SQRDMULH (vector) 2223 threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp", 2224 ("int16_t", "int32_t"), 2, sqrdmulhCode) 2225 threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp", 2226 ("int16_t", "int32_t"), 4, sqrdmulhCode) 2227 threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp", 2228 ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True) 2229 # SQRSHL 2230 sqrshlCode = ''' 2231 int16_t shiftAmt = (int8_t)srcElem2; 2232 FPSCR fpscr = (FPSCR) FpscrQc; 2233 if (shiftAmt < 0) { 2234 shiftAmt = -shiftAmt; 2235 Element rBit = 0; 2236 if (shiftAmt <= sizeof(Element) * 8) 2237 rBit = bits(srcElem1, shiftAmt - 1); 2238 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) 2239 rBit = 1; 2240 if (shiftAmt >= sizeof(Element) * 8) { 2241 shiftAmt = sizeof(Element) * 8 - 1; 2242 destElem = 0; 2243 } else { 2244 destElem = (srcElem1 >> shiftAmt); 2245 } 2246 // Make sure the right shift sign extended when it should. 2247 if (srcElem1 < 0 && destElem >= 0) { 2248 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2249 1 - shiftAmt)); 2250 } 2251 destElem += rBit; 2252 } else if (shiftAmt > 0) { 2253 bool sat = false; 2254 if (shiftAmt >= sizeof(Element) * 8) { 2255 if (srcElem1 != 0) 2256 sat = true; 2257 else 2258 destElem = 0; 2259 } else { 2260 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, 2261 sizeof(Element) * 8 - 1 - shiftAmt) != 2262 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2263 sat = true; 2264 } else { 2265 destElem = srcElem1 << shiftAmt; 2266 } 2267 } 2268 if (sat) { 2269 fpscr.qc = 1; 2270 destElem = mask(sizeof(Element) * 8 - 1); 2271 if (srcElem1 < 0) 2272 destElem = ~destElem; 2273 } 2274 } else { 2275 destElem = srcElem1; 2276 } 2277 FpscrQc = fpscr; 2278 ''' 2279 threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2, 2280 sqrshlCode) 2281 threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4, 2282 sqrshlCode) 2283 threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4, 2284 sqrshlCode, scalar=True) 2285 # SQRSHRN, SQRSHRN2 2286 sqrshrnCode = ''' 2287 FPSCR fpscr = (FPSCR) FpscrQc; 2288 if (imm > sizeof(srcElem1) * 8) { 2289 if (srcElem1 != 0 && srcElem1 != -1) 2290 fpscr.qc = 1; 2291 destElem = 0; 2292 } else if (imm) { 2293 BigElement mid = (srcElem1 >> (imm - 1)); 2294 uint64_t rBit = mid & 0x1; 2295 mid >>= 1; 2296 mid |= -(mid & ((BigElement)1 << 2297 (sizeof(BigElement) * 8 - 1 - imm))); 2298 mid += rBit; 2299 if (mid != (Element)mid) { 2300 destElem = mask(sizeof(Element) * 8 - 1); 2301 if (srcElem1 < 0) 2302 destElem = ~destElem; 2303 fpscr.qc = 1; 2304 } else { 2305 destElem = mid; 2306 } 2307 } else { 2308 if (srcElem1 != (Element)srcElem1) { 2309 destElem = mask(sizeof(Element) * 8 - 1); 2310 if (srcElem1 < 0) 2311 destElem = ~destElem; 2312 fpscr.qc = 1; 2313 } else { 2314 destElem = srcElem1; 2315 } 2316 } 2317 FpscrQc = fpscr; 2318 ''' 2319 twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes, 2320 sqrshrnCode, hasImm=True) 2321 twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes, 2322 sqrshrnCode, hasImm=True, hi=True) 2323 twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes, 2324 sqrshrnCode, hasImm=True, scalar=True) 2325 # SQRSHRUN, SQRSHRUN2 2326 sqrshrunCode = ''' 2327 FPSCR fpscr = (FPSCR) FpscrQc; 2328 if (imm > sizeof(srcElem1) * 8) { 2329 if (srcElem1 != 0) 2330 fpscr.qc = 1; 2331 destElem = 0; 2332 } else if (imm) { 2333 BigElement mid = (srcElem1 >> (imm - 1)); 2334 uint64_t rBit = mid & 0x1; 2335 mid >>= 1; 2336 mid |= -(mid & ((BigElement)1 << 2337 (sizeof(BigElement) * 8 - 1 - imm))); 2338 mid += rBit; 2339 if (bits(mid, sizeof(BigElement) * 8 - 1, 2340 sizeof(Element) * 8) != 0) { 2341 if (srcElem1 < 0) { 2342 destElem = 0; 2343 } else { 2344 destElem = mask(sizeof(Element) * 8); 2345 } 2346 fpscr.qc = 1; 2347 } else { 2348 destElem = mid; 2349 } 2350 } else { 2351 if (srcElem1 < 0) { 2352 fpscr.qc = 1; 2353 destElem = 0; 2354 } else { 2355 destElem = srcElem1; 2356 } 2357 } 2358 FpscrQc = fpscr; 2359 ''' 2360 twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes, 2361 sqrshrunCode, hasImm=True) 2362 twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp", 2363 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True) 2364 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp", 2365 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True) 2366 # SQSHL (immediate) 2367 sqshlImmCode = ''' 2368 FPSCR fpscr = (FPSCR) FpscrQc; 2369 if (imm >= sizeof(Element) * 8) { 2370 if (srcElem1 != 0) { 2371 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2372 if (srcElem1 > 0) 2373 destElem = ~destElem; 2374 fpscr.qc = 1; 2375 } else { 2376 destElem = 0; 2377 } 2378 } else if (imm) { 2379 destElem = (srcElem1 << imm); 2380 uint64_t topBits = bits((uint64_t)srcElem1, 2381 sizeof(Element) * 8 - 1, 2382 sizeof(Element) * 8 - 1 - imm); 2383 if (topBits != 0 && topBits != mask(imm + 1)) { 2384 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2385 if (srcElem1 > 0) 2386 destElem = ~destElem; 2387 fpscr.qc = 1; 2388 } 2389 } else { 2390 destElem = srcElem1; 2391 } 2392 FpscrQc = fpscr; 2393 ''' 2394 twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2, 2395 sqshlImmCode, hasImm=True) 2396 twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4, 2397 sqshlImmCode, hasImm=True) 2398 twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4, 2399 sqshlImmCode, hasImm=True, scalar=True) 2400 # SQSHL (register) 2401 sqshlCode = ''' 2402 int16_t shiftAmt = (int8_t)srcElem2; 2403 FPSCR fpscr = (FPSCR) FpscrQc; 2404 if (shiftAmt < 0) { 2405 shiftAmt = -shiftAmt; 2406 if (shiftAmt >= sizeof(Element) * 8) { 2407 shiftAmt = sizeof(Element) * 8 - 1; 2408 destElem = 0; 2409 } else { 2410 destElem = (srcElem1 >> shiftAmt); 2411 } 2412 // Make sure the right shift sign extended when it should. 2413 if (srcElem1 < 0 && destElem >= 0) { 2414 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2415 1 - shiftAmt)); 2416 } 2417 } else if (shiftAmt > 0) { 2418 bool sat = false; 2419 if (shiftAmt >= sizeof(Element) * 8) { 2420 if (srcElem1 != 0) 2421 sat = true; 2422 else 2423 destElem = 0; 2424 } else { 2425 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, 2426 sizeof(Element) * 8 - 1 - shiftAmt) != 2427 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { 2428 sat = true; 2429 } else { 2430 destElem = srcElem1 << shiftAmt; 2431 } 2432 } 2433 if (sat) { 2434 fpscr.qc = 1; 2435 destElem = mask(sizeof(Element) * 8 - 1); 2436 if (srcElem1 < 0) 2437 destElem = ~destElem; 2438 } 2439 } else { 2440 destElem = srcElem1; 2441 } 2442 FpscrQc = fpscr; 2443 ''' 2444 threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2, 2445 sqshlCode) 2446 threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4, 2447 sqshlCode) 2448 threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4, 2449 sqshlCode, scalar=True) 2450 # SQSHLU 2451 sqshluCode = ''' 2452 FPSCR fpscr = (FPSCR) FpscrQc; 2453 if (imm >= sizeof(Element) * 8) { 2454 if (srcElem1 < 0) { 2455 destElem = 0; 2456 fpscr.qc = 1; 2457 } else if (srcElem1 > 0) { 2458 destElem = mask(sizeof(Element) * 8); 2459 fpscr.qc = 1; 2460 } else { 2461 destElem = 0; 2462 } 2463 } else if (imm) { 2464 destElem = (srcElem1 << imm); 2465 uint64_t topBits = bits((uint64_t)srcElem1, 2466 sizeof(Element) * 8 - 1, 2467 sizeof(Element) * 8 - imm); 2468 if (srcElem1 < 0) { 2469 destElem = 0; 2470 fpscr.qc = 1; 2471 } else if (topBits != 0) { 2472 destElem = mask(sizeof(Element) * 8); 2473 fpscr.qc = 1; 2474 } 2475 } else { 2476 if (srcElem1 < 0) { 2477 fpscr.qc = 1; 2478 destElem = 0; 2479 } else { 2480 destElem = srcElem1; 2481 } 2482 } 2483 FpscrQc = fpscr; 2484 ''' 2485 twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2, 2486 sqshluCode, hasImm=True) 2487 twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4, 2488 sqshluCode, hasImm=True) 2489 twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4, 2490 sqshluCode, hasImm=True, scalar=True) 2491 # SQSHRN, SQSHRN2 2492 sqshrnCode = ''' 2493 FPSCR fpscr = (FPSCR) FpscrQc; 2494 if (imm > sizeof(srcElem1) * 8) { 2495 if (srcElem1 != 0 && srcElem1 != -1) 2496 fpscr.qc = 1; 2497 destElem = 0; 2498 } else if (imm) { 2499 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2500 mid |= -(mid & ((BigElement)1 << 2501 (sizeof(BigElement) * 8 - 1 - imm))); 2502 if (mid != (Element)mid) { 2503 destElem = mask(sizeof(Element) * 8 - 1); 2504 if (srcElem1 < 0) 2505 destElem = ~destElem; 2506 fpscr.qc = 1; 2507 } else { 2508 destElem = mid; 2509 } 2510 } else { 2511 destElem = srcElem1; 2512 } 2513 FpscrQc = fpscr; 2514 ''' 2515 twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes, 2516 sqshrnCode, hasImm=True) 2517 twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes, 2518 sqshrnCode, hasImm=True, hi=True) 2519 twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes, 2520 sqshrnCode, hasImm=True, scalar=True) 2521 # SQSHRUN, SQSHRUN2 2522 sqshrunCode = ''' 2523 FPSCR fpscr = (FPSCR) FpscrQc; 2524 if (imm > sizeof(srcElem1) * 8) { 2525 if (srcElem1 != 0) 2526 fpscr.qc = 1; 2527 destElem = 0; 2528 } else if (imm) { 2529 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 2530 if (bits(mid, sizeof(BigElement) * 8 - 1, 2531 sizeof(Element) * 8) != 0) { 2532 if (srcElem1 < 0) { 2533 destElem = 0; 2534 } else { 2535 destElem = mask(sizeof(Element) * 8); 2536 } 2537 fpscr.qc = 1; 2538 } else { 2539 destElem = mid; 2540 } 2541 } else { 2542 destElem = srcElem1; 2543 } 2544 FpscrQc = fpscr; 2545 ''' 2546 twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes, 2547 sqshrunCode, hasImm=True) 2548 twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes, 2549 sqshrunCode, hasImm=True, hi=True) 2550 twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes, 2551 sqshrunCode, hasImm=True, scalar=True) 2552 # SQSUB 2553 sqsubCode = ''' 2554 destElem = srcElem1 - srcElem2; 2555 FPSCR fpscr = (FPSCR) FpscrQc; 2556 bool negDest = (destElem < 0); 2557 bool negSrc1 = (srcElem1 < 0); 2558 bool posSrc2 = (srcElem2 >= 0); 2559 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { 2560 destElem = (Element)1 << (sizeof(Element) * 8 - 1); 2561 if (negDest) 2562 destElem -= 1; 2563 fpscr.qc = 1; 2564 } 2565 FpscrQc = fpscr; 2566 ''' 2567 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2, 2568 sqsubCode) 2569 threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4, 2570 sqsubCode) 2571 threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4, 2572 sqsubCode, scalar=True) 2573 # SQXTN, SQXTN2 2574 sqxtnCode = ''' 2575 FPSCR fpscr = (FPSCR) FpscrQc; 2576 destElem = srcElem1; 2577 if ((BigElement)destElem != srcElem1) { 2578 fpscr.qc = 1; 2579 destElem = mask(sizeof(Element) * 8 - 1); 2580 if (srcElem1 < 0) 2581 destElem = ~destElem; 2582 } 2583 FpscrQc = fpscr; 2584 ''' 2585 twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes, 2586 sqxtnCode) 2587 twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes, 2588 sqxtnCode, hi=True) 2589 twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes, 2590 sqxtnCode, scalar=True) 2591 # SQXTUN, SQXTUN2 2592 sqxtunCode = ''' 2593 FPSCR fpscr = (FPSCR) FpscrQc; 2594 destElem = srcElem1; 2595 if (srcElem1 < 0 || 2596 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { 2597 fpscr.qc = 1; 2598 destElem = mask(sizeof(Element) * 8); 2599 if (srcElem1 < 0) 2600 destElem = ~destElem; 2601 } 2602 FpscrQc = fpscr; 2603 ''' 2604 twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes, 2605 sqxtunCode) 2606 twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes, 2607 sqxtunCode, hi=True) 2608 twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes, 2609 sqxtunCode, scalar=True) 2610 # SRHADD 2611 rhaddCode = ''' 2612 Element carryBit = 2613 (((unsigned)srcElem1 & 0x1) + 2614 ((unsigned)srcElem2 & 0x1) + 1) >> 1; 2615 // Use division instead of a shift to ensure the sign extension works 2616 // right. The compiler will figure out if it can be a shift. Mask the 2617 // inputs so they get truncated correctly. 2618 destElem = (((srcElem1 & ~(Element)1) / 2) + 2619 ((srcElem2 & ~(Element)1) / 2)) + carryBit; 2620 ''' 2621 threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2, 2622 rhaddCode) 2623 threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4, 2624 rhaddCode) 2625 # SRI 2626 sriCode = ''' 2627 if (imm >= sizeof(Element) * 8) 2628 destElem = destElem; 2629 else 2630 destElem = (srcElem1 >> imm) | 2631 (destElem & ~mask(sizeof(Element) * 8 - imm)); 2632 ''' 2633 twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode, 2634 True, hasImm=True) 2635 twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode, 2636 True, hasImm=True) 2637 # SRSHL 2638 rshlCode = ''' 2639 int16_t shiftAmt = (int8_t)srcElem2; 2640 if (shiftAmt < 0) { 2641 shiftAmt = -shiftAmt; 2642 Element rBit = 0; 2643 if (shiftAmt <= sizeof(Element) * 8) 2644 rBit = bits(srcElem1, shiftAmt - 1); 2645 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) 2646 rBit = 1; 2647 if (shiftAmt >= sizeof(Element) * 8) { 2648 shiftAmt = sizeof(Element) * 8 - 1; 2649 destElem = 0; 2650 } else { 2651 destElem = (srcElem1 >> shiftAmt); 2652 } 2653 // Make sure the right shift sign extended when it should. 2654 if (ltz(srcElem1) && !ltz(destElem)) { 2655 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2656 1 - shiftAmt)); 2657 } 2658 destElem += rBit; 2659 } else if (shiftAmt > 0) { 2660 if (shiftAmt >= sizeof(Element) * 8) { 2661 destElem = 0; 2662 } else { 2663 destElem = srcElem1 << shiftAmt; 2664 } 2665 } else { 2666 destElem = srcElem1; 2667 } 2668 ''' 2669 threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2, 2670 rshlCode) 2671 threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4, 2672 rshlCode) 2673 # SRSHR 2674 rshrCode = ''' 2675 if (imm > sizeof(srcElem1) * 8) { 2676 destElem = 0; 2677 } else if (imm) { 2678 Element rBit = bits(srcElem1, imm - 1); 2679 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2680 } else { 2681 destElem = srcElem1; 2682 } 2683 ''' 2684 twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2, 2685 rshrCode, hasImm=True) 2686 twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4, 2687 rshrCode, hasImm=True) 2688 # SRSRA 2689 rsraCode = ''' 2690 if (imm > sizeof(srcElem1) * 8) { 2691 destElem += 0; 2692 } else if (imm) { 2693 Element rBit = bits(srcElem1, imm - 1); 2694 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; 2695 } else { 2696 destElem += srcElem1; 2697 } 2698 ''' 2699 twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2, 2700 rsraCode, True, hasImm=True) 2701 twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4, 2702 rsraCode, True, hasImm=True) 2703 # SSHL 2704 shlCode = ''' 2705 int16_t shiftAmt = (int8_t)srcElem2; 2706 if (shiftAmt < 0) { 2707 shiftAmt = -shiftAmt; 2708 if (shiftAmt >= sizeof(Element) * 8) { 2709 shiftAmt = sizeof(Element) * 8 - 1; 2710 destElem = 0; 2711 } else { 2712 destElem = (srcElem1 >> shiftAmt); 2713 } 2714 // Make sure the right shift sign extended when it should. 2715 if (ltz(srcElem1) && !ltz(destElem)) { 2716 destElem |= -((Element)1 << (sizeof(Element) * 8 - 2717 1 - shiftAmt)); 2718 } 2719 } else { 2720 if (shiftAmt >= sizeof(Element) * 8) { 2721 destElem = 0; 2722 } else { 2723 destElem = srcElem1 << shiftAmt; 2724 } 2725 } 2726 ''' 2727 threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2, 2728 shlCode) 2729 threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4, 2730 shlCode) 2731 # SSHLL, SSHLL2 2732 shllCode = ''' 2733 if (imm >= sizeof(destElem) * 8) { 2734 destElem = 0; 2735 } else { 2736 destElem = (BigElement)srcElem1 << imm; 2737 } 2738 ''' 2739 twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes, 2740 shllCode, hasImm=True) 2741 twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes, 2742 shllCode, hasImm=True, hi=True) 2743 # SSHR 2744 shrCode = ''' 2745 if (imm >= sizeof(srcElem1) * 8) { 2746 if (ltz(srcElem1)) 2747 destElem = -1; 2748 else 2749 destElem = 0; 2750 } else { 2751 destElem = srcElem1 >> imm; 2752 } 2753 ''' 2754 twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode, 2755 hasImm=True) 2756 twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode, 2757 hasImm=True) 2758 # SSRA 2759 sraCode = ''' 2760 Element mid;; 2761 if (imm >= sizeof(srcElem1) * 8) { 2762 mid = ltz(srcElem1) ? -1 : 0; 2763 } else { 2764 mid = srcElem1 >> imm; 2765 if (ltz(srcElem1) && !ltz(mid)) { 2766 mid |= -(mid & ((Element)1 << 2767 (sizeof(Element) * 8 - 1 - imm))); 2768 } 2769 } 2770 destElem += mid; 2771 ''' 2772 twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode, 2773 True, hasImm=True) 2774 twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode, 2775 True, hasImm=True) 2776 # SSUBL 2777 sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;" 2778 threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes, 2779 sublwCode) 2780 threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes, 2781 sublwCode, hi=True) 2782 # SSUBW 2783 threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes, 2784 sublwCode) 2785 threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes, 2786 sublwCode, hi=True) 2787 # SUB 2788 subCode = "destElem = srcElem1 - srcElem2;" 2789 threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode) 2790 threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode) 2791 # SUBHN, SUBHN2 2792 subhnCode = ''' 2793 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> 2794 (sizeof(Element) * 8); 2795 ''' 2796 threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes, 2797 subhnCode) 2798 threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes, 2799 subhnCode, hi=True) 2800 # SUQADD 2801 suqaddCode = ''' 2802 FPSCR fpscr = (FPSCR) FpscrQc; 2803 Element tmp = destElem + srcElem1; 2804 if (bits(destElem, sizeof(Element) * 8 - 1) == 0) { 2805 if (bits(tmp, sizeof(Element) * 8 - 1) == 1 || 2806 tmp < srcElem1 || tmp < destElem) { 2807 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; 2808 fpscr.qc = 1; 2809 } else { 2810 destElem = tmp; 2811 } 2812 } else { 2813 Element absDestElem = (~destElem) + 1; 2814 if (absDestElem < srcElem1) { 2815 // Still check for positive sat., no need to check for negative sat. 2816 if (bits(tmp, sizeof(Element) * 8 - 1) == 1) { 2817 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; 2818 fpscr.qc = 1; 2819 } else { 2820 destElem = tmp; 2821 } 2822 } else { 2823 destElem = tmp; 2824 } 2825 } 2826 FpscrQc = fpscr; 2827 ''' 2828 twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 2829 suqaddCode, True) 2830 twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4, 2831 suqaddCode, True) 2832 twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4, 2833 suqaddCode, True, scalar=True) 2834 # SXTL -> alias to SSHLL 2835 # TBL 2836 tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2) 2837 tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4) 2838 tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2) 2839 tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4) 2840 tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2) 2841 tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4) 2842 tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2) 2843 tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4) 2844 # TBX 2845 tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2) 2846 tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4) 2847 tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2) 2848 tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4) 2849 tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2) 2850 tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4) 2851 tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2) 2852 tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4) 2853 # TRN1 2854 trnCode = ''' 2855 unsigned part = %s; 2856 for (unsigned i = 0; i < eCount / 2; i++) { 2857 destReg.elements[2 * i] = srcReg1.elements[2 * i + part]; 2858 destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part]; 2859 } 2860 ''' 2861 threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2, 2862 trnCode % "0") 2863 threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4, 2864 trnCode % "0") 2865 # TRN2 2866 threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2, 2867 trnCode % "1") 2868 threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4, 2869 trnCode % "1") 2870 # UABA 2871 threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2, 2872 abaCode, True) 2873 threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4, 2874 abaCode, True) 2875 # UABAL, UABAL2 2876 threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes, 2877 abalCode, True) 2878 threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes, 2879 abalCode, True, hi=True) 2880 # UABD 2881 threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2, 2882 abdCode) 2883 threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4, 2884 abdCode) 2885 # UABDL, UABDL2 2886 threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes, 2887 abdlCode, True) 2888 threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes, 2889 abdlCode, True, hi=True) 2890 # UADALP 2891 twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes, 2892 2, adalpCode, True) 2893 twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes, 2894 4, adalpCode, True) 2895 # UADDL, UADDL2 2896 threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes, 2897 addlwCode) 2898 threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes, 2899 addlwCode, hi=True) 2900 # UADDLP 2901 twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes, 2902 2, addlwCode) 2903 twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes, 2904 4, addlwCode) 2905 # UADDLV 2906 twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp", 2907 ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True) 2908 twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp", 2909 ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True) 2910 twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4, 2911 addAcrossLongCode, doubleDest=True, long=True) 2912 # UADDW 2913 threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes, 2914 addlwCode) 2915 threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes, 2916 addlwCode, hi=True) 2917 # UCVTF (fixed-point) 2918 ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true," 2919 " FPCRRounding(fpscr), fpscr)") 2920 twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, 2921 ucvtfFixedCode, hasImm=True) 2922 twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4, 2923 ucvtfFixedCode, hasImm=True) 2924 twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4, 2925 ucvtfFixedCode, hasImm=True, scalar=True) 2926 # UCVTF (integer) 2927 ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true," 2928 " FPCRRounding(fpscr), fpscr)") 2929 twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, 2930 ucvtfIntCode) 2931 twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4, 2932 ucvtfIntCode) 2933 twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4, 2934 ucvtfIntCode, scalar=True) 2935 # UHADD 2936 threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2, 2937 haddCode) 2938 threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4, 2939 haddCode) 2940 # UHSUB 2941 threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2, 2942 hsubCode) 2943 threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4, 2944 hsubCode) 2945 # UMAX 2946 threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2, 2947 maxCode) 2948 threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4, 2949 maxCode) 2950 # UMAXP 2951 threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2, 2952 maxCode, pairwise=True) 2953 threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4, 2954 maxCode, pairwise=True) 2955 # UMAXV 2956 twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), 2957 2, maxAcrossCode) 2958 twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4, 2959 maxAcrossCode) 2960 # UMIN 2961 threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2, 2962 minCode) 2963 threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4, 2964 minCode) 2965 # UMINP 2966 threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2, 2967 minCode, pairwise=True) 2968 threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4, 2969 minCode, pairwise=True) 2970 # UMINV 2971 twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), 2972 2, minAcrossCode) 2973 twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4, 2974 minAcrossCode) 2975 # UMLAL (by element) 2976 threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp", 2977 smallUnsignedTypes, mlalCode, True, byElem=True) 2978 threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp", 2979 smallUnsignedTypes, mlalCode, True, byElem=True, hi=True) 2980 # UMLAL (vector) 2981 threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes, 2982 mlalCode, True) 2983 threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes, 2984 mlalCode, True, hi=True) 2985 # UMLSL (by element) 2986 threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp", 2987 smallUnsignedTypes, mlslCode, True, byElem=True) 2988 threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp", 2989 smallUnsignedTypes, mlslCode, True, byElem=True, hi=True) 2990 # UMLSL (vector) 2991 threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes, 2992 mlslCode, True) 2993 threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes, 2994 mlslCode, True, hi=True) 2995 # UMOV 2996 insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') 2997 insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X') 2998 # UMULL, UMULL2 (by element) 2999 threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes, 3000 mullCode, byElem=True) 3001 threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes, 3002 mullCode, byElem=True, hi=True) 3003 # UMULL, UMULL2 (vector) 3004 threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes, 3005 mullCode) 3006 threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes, 3007 mullCode, hi=True) 3008 # UQADD 3009 uqaddCode = ''' 3010 destElem = srcElem1 + srcElem2; 3011 FPSCR fpscr = (FPSCR) FpscrQc; 3012 if (destElem < srcElem1 || destElem < srcElem2) { 3013 destElem = (Element)(-1); 3014 fpscr.qc = 1; 3015 } 3016 FpscrQc = fpscr; 3017 ''' 3018 threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 3019 uqaddCode) 3020 threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4, 3021 uqaddCode) 3022 threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4, 3023 uqaddCode, scalar=True) 3024 # UQRSHL 3025 uqrshlCode = ''' 3026 int16_t shiftAmt = (int8_t)srcElem2; 3027 FPSCR fpscr = (FPSCR) FpscrQc; 3028 if (shiftAmt < 0) { 3029 shiftAmt = -shiftAmt; 3030 Element rBit = 0; 3031 if (shiftAmt <= sizeof(Element) * 8) 3032 rBit = bits(srcElem1, shiftAmt - 1); 3033 if (shiftAmt >= sizeof(Element) * 8) { 3034 shiftAmt = sizeof(Element) * 8 - 1; 3035 destElem = 0; 3036 } else { 3037 destElem = (srcElem1 >> shiftAmt); 3038 } 3039 destElem += rBit; 3040 } else { 3041 if (shiftAmt >= sizeof(Element) * 8) { 3042 if (srcElem1 != 0) { 3043 destElem = mask(sizeof(Element) * 8); 3044 fpscr.qc = 1; 3045 } else { 3046 destElem = 0; 3047 } 3048 } else { 3049 if (bits(srcElem1, sizeof(Element) * 8 - 1, 3050 sizeof(Element) * 8 - shiftAmt)) { 3051 destElem = mask(sizeof(Element) * 8); 3052 fpscr.qc = 1; 3053 } else { 3054 destElem = srcElem1 << shiftAmt; 3055 } 3056 } 3057 } 3058 FpscrQc = fpscr; 3059 ''' 3060 threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes, 3061 2, uqrshlCode) 3062 threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4, 3063 uqrshlCode) 3064 threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4, 3065 uqrshlCode, scalar=True) 3066 # UQRSHRN 3067 uqrshrnCode = ''' 3068 FPSCR fpscr = (FPSCR) FpscrQc; 3069 if (imm > sizeof(srcElem1) * 8) { 3070 if (srcElem1 != 0) 3071 fpscr.qc = 1; 3072 destElem = 0; 3073 } else if (imm) { 3074 BigElement mid = (srcElem1 >> (imm - 1)); 3075 uint64_t rBit = mid & 0x1; 3076 mid >>= 1; 3077 mid += rBit; 3078 if (mid != (Element)mid) { 3079 destElem = mask(sizeof(Element) * 8); 3080 fpscr.qc = 1; 3081 } else { 3082 destElem = mid; 3083 } 3084 } else { 3085 if (srcElem1 != (Element)srcElem1) { 3086 destElem = mask(sizeof(Element) * 8 - 1); 3087 fpscr.qc = 1; 3088 } else { 3089 destElem = srcElem1; 3090 } 3091 } 3092 FpscrQc = fpscr; 3093 ''' 3094 twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes, 3095 uqrshrnCode, hasImm=True) 3096 twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp", 3097 smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True) 3098 twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp", 3099 smallUnsignedTypes, uqrshrnCode, hasImm=True, 3100 scalar=True) 3101 # UQSHL (immediate) 3102 uqshlImmCode = ''' 3103 FPSCR fpscr = (FPSCR) FpscrQc; 3104 if (imm >= sizeof(Element) * 8) { 3105 if (srcElem1 != 0) { 3106 destElem = mask(sizeof(Element) * 8); 3107 fpscr.qc = 1; 3108 } else { 3109 destElem = 0; 3110 } 3111 } else if (imm) { 3112 destElem = (srcElem1 << imm); 3113 uint64_t topBits = bits((uint64_t)srcElem1, 3114 sizeof(Element) * 8 - 1, 3115 sizeof(Element) * 8 - imm); 3116 if (topBits != 0) { 3117 destElem = mask(sizeof(Element) * 8); 3118 fpscr.qc = 1; 3119 } 3120 } else { 3121 destElem = srcElem1; 3122 } 3123 FpscrQc = fpscr; 3124 ''' 3125 twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2, 3126 uqshlImmCode, hasImm=True) 3127 twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4, 3128 uqshlImmCode, hasImm=True) 3129 twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4, 3130 uqshlImmCode, hasImm=True, scalar=True) 3131 # UQSHL (register) 3132 uqshlCode = ''' 3133 int16_t shiftAmt = (int8_t)srcElem2; 3134 FPSCR fpscr = (FPSCR) FpscrQc; 3135 if (shiftAmt < 0) { 3136 shiftAmt = -shiftAmt; 3137 if (shiftAmt >= sizeof(Element) * 8) { 3138 shiftAmt = sizeof(Element) * 8 - 1; 3139 destElem = 0; 3140 } else { 3141 destElem = (srcElem1 >> shiftAmt); 3142 } 3143 } else if (shiftAmt > 0) { 3144 if (shiftAmt >= sizeof(Element) * 8) { 3145 if (srcElem1 != 0) { 3146 destElem = mask(sizeof(Element) * 8); 3147 fpscr.qc = 1; 3148 } else { 3149 destElem = 0; 3150 } 3151 } else { 3152 if (bits(srcElem1, sizeof(Element) * 8 - 1, 3153 sizeof(Element) * 8 - shiftAmt)) { 3154 destElem = mask(sizeof(Element) * 8); 3155 fpscr.qc = 1; 3156 } else { 3157 destElem = srcElem1 << shiftAmt; 3158 } 3159 } 3160 } else { 3161 destElem = srcElem1; 3162 } 3163 FpscrQc = fpscr; 3164 ''' 3165 threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2, 3166 uqshlCode) 3167 threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4, 3168 uqshlCode) 3169 threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4, 3170 uqshlCode, scalar=True) 3171 # UQSHRN, UQSHRN2 3172 uqshrnCode = ''' 3173 FPSCR fpscr = (FPSCR) FpscrQc; 3174 if (imm > sizeof(srcElem1) * 8) { 3175 if (srcElem1 != 0) 3176 fpscr.qc = 1; 3177 destElem = 0; 3178 } else if (imm) { 3179 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); 3180 if (mid != (Element)mid) { 3181 destElem = mask(sizeof(Element) * 8); 3182 fpscr.qc = 1; 3183 } else { 3184 destElem = mid; 3185 } 3186 } else { 3187 destElem = srcElem1; 3188 } 3189 FpscrQc = fpscr; 3190 ''' 3191 twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes, 3192 uqshrnCode, hasImm=True) 3193 twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes, 3194 uqshrnCode, hasImm=True, hi=True) 3195 twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes, 3196 uqshrnCode, hasImm=True, scalar=True) 3197 # UQSUB 3198 uqsubCode = ''' 3199 destElem = srcElem1 - srcElem2; 3200 FPSCR fpscr = (FPSCR) FpscrQc; 3201 if (destElem > srcElem1) { 3202 destElem = 0; 3203 fpscr.qc = 1; 3204 } 3205 FpscrQc = fpscr; 3206 ''' 3207 threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2, 3208 uqsubCode) 3209 threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4, 3210 uqsubCode) 3211 threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4, 3212 uqsubCode, scalar=True) 3213 # UQXTN 3214 uqxtnCode = ''' 3215 FPSCR fpscr = (FPSCR) FpscrQc; 3216 destElem = srcElem1; 3217 if ((BigElement)destElem != srcElem1) { 3218 fpscr.qc = 1; 3219 destElem = mask(sizeof(Element) * 8); 3220 } 3221 FpscrQc = fpscr; 3222 ''' 3223 twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes, 3224 uqxtnCode) 3225 twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes, 3226 uqxtnCode, hi=True) 3227 twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes, 3228 uqxtnCode, scalar=True) 3229 # URECPE 3230 urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);" 3231 twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2, 3232 urecpeCode) 3233 twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4, 3234 urecpeCode) 3235 # URHADD 3236 threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes, 3237 2, rhaddCode) 3238 threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes, 3239 4, rhaddCode) 3240 # URSHL 3241 threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2, 3242 rshlCode) 3243 threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4, 3244 rshlCode) 3245 # URSHR 3246 twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2, 3247 rshrCode, hasImm=True) 3248 twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4, 3249 rshrCode, hasImm=True) 3250 # URSQRTE 3251 ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);" 3252 twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2, 3253 ursqrteCode) 3254 twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4, 3255 ursqrteCode) 3256 # URSRA 3257 twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2, 3258 rsraCode, True, hasImm=True) 3259 twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4, 3260 rsraCode, True, hasImm=True) 3261 # USHL 3262 threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2, 3263 shlCode) 3264 threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4, 3265 shlCode) 3266 # USHLL, USHLL2 3267 twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes, 3268 shllCode, hasImm=True) 3269 twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes, 3270 shllCode, hi=True, hasImm=True) 3271 # USHR 3272 twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2, 3273 shrCode, hasImm=True) 3274 twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4, 3275 shrCode, hasImm=True) 3276 # USQADD 3277 usqaddCode = ''' 3278 FPSCR fpscr = (FPSCR) FpscrQc; 3279 Element tmp = destElem + srcElem1; 3280 if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) { 3281 if (tmp < srcElem1 || tmp < destElem) { 3282 destElem = (Element)(-1); 3283 fpscr.qc = 1; 3284 } else { 3285 destElem = tmp; 3286 } 3287 } else { 3288 Element absSrcElem1 = (~srcElem1) + 1; 3289 if (absSrcElem1 > destElem) { 3290 destElem = 0; 3291 fpscr.qc = 1; 3292 } else { 3293 destElem = tmp; 3294 } 3295 } 3296 FpscrQc = fpscr; 3297 ''' 3298 twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2, 3299 usqaddCode, True) 3300 twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4, 3301 usqaddCode, True) 3302 twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4, 3303 usqaddCode, True, scalar=True) 3304 # USRA 3305 twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2, 3306 sraCode, True, hasImm=True) 3307 twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4, 3308 sraCode, True, hasImm=True) 3309 # USUBL 3310 threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes, 3311 sublwCode) 3312 threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes, 3313 sublwCode, hi=True) 3314 # USUBW 3315 threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes, 3316 sublwCode) 3317 threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes, 3318 sublwCode, hi=True) 3319 # UXTL -> alias to USHLL 3320 # UZP1 3321 uzpCode = ''' 3322 unsigned part = %s; 3323 for (unsigned i = 0; i < eCount / 2; i++) { 3324 destReg.elements[i] = srcReg1.elements[2 * i + part]; 3325 destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part]; 3326 } 3327 ''' 3328 threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2, 3329 uzpCode % "0") 3330 threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4, 3331 uzpCode % "0") 3332 # UZP2 3333 threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2, 3334 uzpCode % "1") 3335 threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4, 3336 uzpCode % "1") 3337 # XTN, XTN2 3338 xtnCode = "destElem = srcElem1;" 3339 twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode) 3340 twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes, 3341 xtnCode, hi=True) 3342 # ZIP1 3343 zipCode = ''' 3344 unsigned base = %s; 3345 for (unsigned i = 0; i < eCount / 2; i++) { 3346 destReg.elements[2 * i] = srcReg1.elements[base + i]; 3347 destReg.elements[2 * i + 1] = srcReg2.elements[base + i]; 3348 } 3349 ''' 3350 threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2, 3351 zipCode % "0") 3352 threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4, 3353 zipCode % "0") 3354 # ZIP2 3355 threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2, 3356 zipCode % "eCount / 2") 3357 threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4, 3358 zipCode % "eCount / 2") 3359 3360 for decoderFlavour, type_dict in decoders.iteritems(): 3361 header_output += ''' 3362 class %(decoder_flavour)sDecoder { 3363 public: 3364 ''' % { "decoder_flavour" : decoderFlavour } 3365 for type,name in type_dict.iteritems(): 3366 header_output += ''' 3367 template<typename Elem> using %(type)s = %(new_name)s<Elem>;''' % { 3368 "type" : type, "new_name" : name 3369 } 3370 header_output += ''' 3371 };''' 3372}}; 3373