neon64.isa (11165:d90aec9435bd) neon64.isa (12038:619bc4100aa8)
1// -*- mode: c++ -*-
2
3// Copyright (c) 2012-2013, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Giacomo Gabrielli
39// Mbou Eyole
40
41let {{
42
43 header_output = ""
44 exec_output = ""
45 decoders = { 'Generic' : {} }
46
47 # FP types (FP operations always work with unsigned representations)
48 floatTypes = ("uint32_t", "uint64_t")
49 smallFloatTypes = ("uint32_t",)
50
51 def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
52 readDest=False, pairwise=False, scalar=False,
53 byElem=False, decoder='Generic'):
54 assert (not pairwise) or ((not byElem) and (not scalar))
55 global header_output, exec_output, decoders
56 eWalkCode = simd64EnabledCheckCode + '''
57 RegVect srcReg1, destReg;
58 '''
59 if byElem:
60 # 2nd register operand has to be read fully
61 eWalkCode += '''
62 FullRegVect srcReg2;
63 '''
64 else:
65 eWalkCode += '''
66 RegVect srcReg2;
67 '''
68 for reg in range(rCount):
69 eWalkCode += '''
70 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
71 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
72 ''' % { "reg" : reg }
73 if readDest:
74 eWalkCode += '''
75 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
76 ''' % { "reg" : reg }
77 if byElem:
78 # 2nd operand has to be read fully
79 for reg in range(rCount, 4):
80 eWalkCode += '''
81 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
82 ''' % { "reg" : reg }
83 readDestCode = ''
84 if readDest:
85 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
86 if pairwise:
87 eWalkCode += '''
88 for (unsigned i = 0; i < eCount; i++) {
89 Element srcElem1 = gtoh(2 * i < eCount ?
90 srcReg1.elements[2 * i] :
91 srcReg2.elements[2 * i - eCount]);
92 Element srcElem2 = gtoh(2 * i < eCount ?
93 srcReg1.elements[2 * i + 1] :
94 srcReg2.elements[2 * i + 1 - eCount]);
95 Element destElem;
96 %(readDest)s
97 %(op)s
98 destReg.elements[i] = htog(destElem);
99 }
100 ''' % { "op" : op, "readDest" : readDestCode }
101 else:
102 scalarCheck = '''
103 if (i != 0) {
104 destReg.elements[i] = 0;
105 continue;
106 }
107 '''
108 eWalkCode += '''
109 for (unsigned i = 0; i < eCount; i++) {
110 %(scalarCheck)s
111 Element srcElem1 = gtoh(srcReg1.elements[i]);
112 Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
113 Element destElem;
114 %(readDest)s
115 %(op)s
116 destReg.elements[i] = htog(destElem);
117 }
118 ''' % { "op" : op, "readDest" : readDestCode,
119 "scalarCheck" : scalarCheck if scalar else "",
120 "src2Index" : "imm" if byElem else "i" }
121 for reg in range(rCount):
122 eWalkCode += '''
123 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
124 ''' % { "reg" : reg }
125 if rCount < 4: # zero upper half
126 for reg in range(rCount, 4):
127 eWalkCode += '''
128 AA64FpDestP%(reg)d_uw = 0;
129 ''' % { "reg" : reg }
130 iop = InstObjParams(name, Name,
131 "DataX2RegImmOp" if byElem else "DataX2RegOp",
132 { "code": eWalkCode,
133 "r_count": rCount,
134 "op_class": opClass }, [])
135 if byElem:
136 header_output += NeonX2RegImmOpDeclare.subst(iop)
137 else:
138 header_output += NeonX2RegOpDeclare.subst(iop)
139 exec_output += NeonXEqualRegOpExecute.subst(iop)
140 for type in types:
141 substDict = { "targs" : type,
142 "class_name" : Name }
143 exec_output += NeonXExecDeclare.subst(substDict)
144
145 def threeUnequalRegInstX(name, Name, opClass, types, op,
146 bigSrc1, bigSrc2, bigDest, readDest, scalar=False,
147 byElem=False, hi=False):
148 assert not (scalar and hi)
149 global header_output, exec_output
150 src1Cnt = src2Cnt = destCnt = 2
151 src1Prefix = src2Prefix = destPrefix = ''
152 if bigSrc1:
153 src1Cnt = 4
154 src1Prefix = 'Big'
155 if bigSrc2:
156 src2Cnt = 4
157 src2Prefix = 'Big'
158 if bigDest:
159 destCnt = 4
160 destPrefix = 'Big'
161 if byElem:
162 src2Prefix = 'Full'
163 eWalkCode = simd64EnabledCheckCode + '''
164 %sRegVect srcReg1;
165 %sRegVect srcReg2;
166 %sRegVect destReg;
167 ''' % (src1Prefix, src2Prefix, destPrefix)
168 srcReg1 = 0
169 if hi and not bigSrc1: # long/widening operations
170 srcReg1 = 2
171 for reg in range(src1Cnt):
172 eWalkCode += '''
173 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw);
174 ''' % { "reg" : reg, "srcReg1" : srcReg1 }
175 srcReg1 += 1
176 srcReg2 = 0
177 if (not byElem) and (hi and not bigSrc2): # long/widening operations
178 srcReg2 = 2
179 for reg in range(src2Cnt):
180 eWalkCode += '''
181 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw);
182 ''' % { "reg" : reg, "srcReg2" : srcReg2 }
183 srcReg2 += 1
184 if byElem:
185 # 2nd operand has to be read fully
186 for reg in range(src2Cnt, 4):
187 eWalkCode += '''
188 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
189 ''' % { "reg" : reg }
190 if readDest:
191 for reg in range(destCnt):
192 eWalkCode += '''
193 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
194 ''' % { "reg" : reg }
195 readDestCode = ''
196 if readDest:
197 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
198 scalarCheck = '''
199 if (i != 0) {
200 destReg.elements[i] = 0;
201 continue;
202 }
203 '''
204 eWalkCode += '''
205 for (unsigned i = 0; i < eCount; i++) {
206 %(scalarCheck)s
207 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
208 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
209 %(destPrefix)sElement destElem;
210 %(readDest)s
211 %(op)s
212 destReg.elements[i] = htog(destElem);
213 }
214 ''' % { "op" : op, "readDest" : readDestCode,
215 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
216 "destPrefix" : destPrefix,
217 "scalarCheck" : scalarCheck if scalar else "",
218 "src2Index" : "imm" if byElem else "i" }
219 destReg = 0
220 if hi and not bigDest:
221 # narrowing operations
222 destReg = 2
223 for reg in range(destCnt):
224 eWalkCode += '''
225 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
226 ''' % { "reg" : reg, "destReg": destReg }
227 destReg += 1
228 if destCnt < 4 and not hi: # zero upper half
229 for reg in range(destCnt, 4):
230 eWalkCode += '''
231 AA64FpDestP%(reg)d_uw = 0;
232 ''' % { "reg" : reg }
233 iop = InstObjParams(name, Name,
234 "DataX2RegImmOp" if byElem else "DataX2RegOp",
235 { "code": eWalkCode,
236 "r_count": 2,
237 "op_class": opClass }, [])
238 if byElem:
239 header_output += NeonX2RegImmOpDeclare.subst(iop)
240 else:
241 header_output += NeonX2RegOpDeclare.subst(iop)
242 exec_output += NeonXUnequalRegOpExecute.subst(iop)
243 for type in types:
244 substDict = { "targs" : type,
245 "class_name" : Name }
246 exec_output += NeonXExecDeclare.subst(substDict)
247
248 def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
249 scalar=False, byElem=False, hi=False):
250 assert not byElem
251 threeUnequalRegInstX(name, Name, opClass, types, op,
252 True, True, False, readDest, scalar, byElem, hi)
253
254 def threeRegLongInstX(name, Name, opClass, types, op, readDest=False,
255 scalar=False, byElem=False, hi=False):
256 threeUnequalRegInstX(name, Name, opClass, types, op,
257 False, False, True, readDest, scalar, byElem, hi)
258
259 def threeRegWideInstX(name, Name, opClass, types, op, readDest=False,
260 scalar=False, byElem=False, hi=False):
261 assert not byElem
262 threeUnequalRegInstX(name, Name, opClass, types, op,
263 True, False, True, readDest, scalar, byElem, hi)
264
265 def twoEqualRegInstX(name, Name, opClass, types, rCount, op,
266 readDest=False, scalar=False, byElem=False,
267 hasImm=False, isDup=False):
268 global header_output, exec_output
269 assert (not isDup) or byElem
270 if byElem:
271 hasImm = True
272 if isDup:
273 eWalkCode = simd64EnabledCheckCode + '''
274 FullRegVect srcReg1;
275 RegVect destReg;
276 '''
277 else:
278 eWalkCode = simd64EnabledCheckCode + '''
279 RegVect srcReg1, destReg;
280 '''
281 for reg in range(4 if isDup else rCount):
282 eWalkCode += '''
283 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
284 ''' % { "reg" : reg }
285 if readDest:
286 eWalkCode += '''
287 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
288 ''' % { "reg" : reg }
289 readDestCode = ''
290 if readDest:
291 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
292 scalarCheck = '''
293 if (i != 0) {
294 destReg.elements[i] = 0;
295 continue;
296 }
297 '''
298 eWalkCode += '''
299 for (unsigned i = 0; i < eCount; i++) {
300 %(scalarCheck)s
301 unsigned j = i;
302 Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]);
303 Element destElem;
304 %(readDest)s
305 %(op)s
306 destReg.elements[j] = htog(destElem);
307 }
308 ''' % { "op" : op, "readDest" : readDestCode,
309 "scalarCheck" : scalarCheck if scalar else "",
310 "src1Index" : "imm" if byElem else "i" }
311 for reg in range(rCount):
312 eWalkCode += '''
313 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
314 ''' % { "reg" : reg }
315 if rCount < 4: # zero upper half
316 for reg in range(rCount, 4):
317 eWalkCode += '''
318 AA64FpDestP%(reg)d_uw = 0;
319 ''' % { "reg" : reg }
320 iop = InstObjParams(name, Name,
321 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
322 { "code": eWalkCode,
323 "r_count": rCount,
324 "op_class": opClass }, [])
325 if hasImm:
326 header_output += NeonX1RegImmOpDeclare.subst(iop)
327 else:
328 header_output += NeonX1RegOpDeclare.subst(iop)
329 exec_output += NeonXEqualRegOpExecute.subst(iop)
330 for type in types:
331 substDict = { "targs" : type,
332 "class_name" : Name }
333 exec_output += NeonXExecDeclare.subst(substDict)
334
335 def twoRegLongInstX(name, Name, opClass, types, op, readDest=False,
336 hi=False, hasImm=False):
337 global header_output, exec_output
338 eWalkCode = simd64EnabledCheckCode + '''
339 RegVect srcReg1;
340 BigRegVect destReg;
341 '''
342 destReg = 0 if not hi else 2
343 for reg in range(2):
344 eWalkCode += '''
345 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw);
346 ''' % { "reg" : reg, "destReg": destReg }
347 destReg += 1
348 destReg = 0 if not hi else 2
349 if readDest:
350 for reg in range(4):
351 eWalkCode += '''
352 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
353 ''' % { "reg" : reg }
354 destReg += 1
355 readDestCode = ''
356 if readDest:
357 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
358 eWalkCode += '''
359 for (unsigned i = 0; i < eCount; i++) {
360 Element srcElem1 = gtoh(srcReg1.elements[i]);
361 BigElement destElem;
362 %(readDest)s
363 %(op)s
364 destReg.elements[i] = htog(destElem);
365 }
366 ''' % { "op" : op, "readDest" : readDestCode }
367 for reg in range(4):
368 eWalkCode += '''
369 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
370 ''' % { "reg" : reg }
371 iop = InstObjParams(name, Name,
372 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
373 { "code": eWalkCode,
374 "r_count": 2,
375 "op_class": opClass }, [])
376 if hasImm:
377 header_output += NeonX1RegImmOpDeclare.subst(iop)
378 else:
379 header_output += NeonX1RegOpDeclare.subst(iop)
380 exec_output += NeonXUnequalRegOpExecute.subst(iop)
381 for type in types:
382 substDict = { "targs" : type,
383 "class_name" : Name }
384 exec_output += NeonXExecDeclare.subst(substDict)
385
386 def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
387 scalar=False, hi=False, hasImm=False):
388 global header_output, exec_output
389 eWalkCode = simd64EnabledCheckCode + '''
390 BigRegVect srcReg1;
391 RegVect destReg;
392 '''
393 for reg in range(4):
394 eWalkCode += '''
395 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
396 ''' % { "reg" : reg }
397 if readDest:
398 for reg in range(2):
399 eWalkCode += '''
400 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
401 ''' % { "reg" : reg }
402 else:
403 eWalkCode += '''
404 destReg.elements[0] = 0;
405 ''' % { "reg" : reg }
406 readDestCode = ''
407 if readDest:
408 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
409 scalarCheck = '''
410 if (i != 0) {
411 destReg.elements[i] = 0;
412 continue;
413 }
414 '''
415 eWalkCode += '''
416 for (unsigned i = 0; i < eCount; i++) {
417 %(scalarCheck)s
418 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
419 Element destElem;
420 %(readDest)s
421 %(op)s
422 destReg.elements[i] = htog(destElem);
423 }
424 ''' % { "op" : op, "readDest" : readDestCode,
425 "scalarCheck" : scalarCheck if scalar else "" }
426 destReg = 0 if not hi else 2
427 for reg in range(2):
428 eWalkCode += '''
429 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
430 ''' % { "reg" : reg, "destReg": destReg }
431 destReg += 1
432 if not hi:
433 for reg in range(2, 4): # zero upper half
434 eWalkCode += '''
435 AA64FpDestP%(reg)d_uw = 0;
436 ''' % { "reg" : reg }
437 iop = InstObjParams(name, Name,
438 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
439 { "code": eWalkCode,
440 "r_count": 2,
441 "op_class": opClass }, [])
442 if hasImm:
443 header_output += NeonX1RegImmOpDeclare.subst(iop)
444 else:
445 header_output += NeonX1RegOpDeclare.subst(iop)
446 exec_output += NeonXUnequalRegOpExecute.subst(iop)
447 for type in types:
448 substDict = { "targs" : type,
449 "class_name" : Name }
450 exec_output += NeonXExecDeclare.subst(substDict)
451
452 def threeRegScrambleInstX(name, Name, opClass, types, rCount, op):
453 global header_output, exec_output
454 eWalkCode = simd64EnabledCheckCode + '''
455 RegVect srcReg1, srcReg2, destReg;
456 '''
457 for reg in range(rCount):
458 eWalkCode += '''
459 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
460 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
461 ''' % { "reg" : reg }
462 eWalkCode += op
463 for reg in range(rCount):
464 eWalkCode += '''
465 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
466 ''' % { "reg" : reg }
467 if rCount < 4:
468 for reg in range(rCount, 4):
469 eWalkCode += '''
470 AA64FpDestP%(reg)d_uw = 0;
471 ''' % { "reg" : reg }
472 iop = InstObjParams(name, Name,
473 "DataX2RegOp",
474 { "code": eWalkCode,
475 "r_count": rCount,
476 "op_class": opClass }, [])
477 header_output += NeonX2RegOpDeclare.subst(iop)
478 exec_output += NeonXEqualRegOpExecute.subst(iop)
479 for type in types:
480 substDict = { "targs" : type,
481 "class_name" : Name }
482 exec_output += NeonXExecDeclare.subst(substDict)
483
484 def insFromVecElemInstX(name, Name, opClass, types, rCount):
485 global header_output, exec_output
486 eWalkCode = simd64EnabledCheckCode + '''
487 FullRegVect srcReg1;
488 RegVect destReg;
489 '''
490 for reg in range(4):
491 eWalkCode += '''
492 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
493 ''' % { "reg" : reg }
494 for reg in range(rCount):
495 eWalkCode += '''
496 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
497 ''' % { "reg" : reg }
498 eWalkCode += '''
499 Element srcElem1 = gtoh(srcReg1.elements[imm2]);
500 Element destElem = srcElem1;
501 destReg.elements[imm1] = htog(destElem);
502 '''
503 for reg in range(rCount):
504 eWalkCode += '''
505 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
506 ''' % { "reg" : reg }
507 iop = InstObjParams(name, Name,
508 "DataX1Reg2ImmOp",
509 { "code": eWalkCode,
510 "r_count": rCount,
511 "op_class": opClass }, [])
512 header_output += NeonX1Reg2ImmOpDeclare.subst(iop)
513 exec_output += NeonXEqualRegOpExecute.subst(iop)
514 for type in types:
515 substDict = { "targs" : type,
516 "class_name" : Name }
517 exec_output += NeonXExecDeclare.subst(substDict)
518
519 def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op):
520 global header_output, exec_output
521 eWalkCode = simd64EnabledCheckCode + '''
522 RegVect srcReg1, destReg;
523 '''
524 for reg in range(rCount):
525 eWalkCode += '''
526 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
527 ''' % { "reg" : reg }
528 eWalkCode += '''
529 Element srcElem1 = gtoh(srcReg1.elements[0]);
530 Element srcElem2 = gtoh(srcReg1.elements[1]);
531 Element destElem;
532 %(op)s
533 destReg.elements[0] = htog(destElem);
534 ''' % { "op" : op }
535 destCnt = rCount / 2
536 for reg in range(destCnt):
537 eWalkCode += '''
538 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
539 ''' % { "reg" : reg }
540 for reg in range(destCnt, 4): # zero upper half
541 eWalkCode += '''
542 AA64FpDestP%(reg)d_uw = 0;
543 ''' % { "reg" : reg }
544 iop = InstObjParams(name, Name,
545 "DataX1RegOp",
546 { "code": eWalkCode,
547 "r_count": rCount,
548 "op_class": opClass }, [])
549 header_output += NeonX1RegOpDeclare.subst(iop)
550 exec_output += NeonXEqualRegOpExecute.subst(iop)
551 for type in types:
552 substDict = { "targs" : type,
553 "class_name" : Name }
554 exec_output += NeonXExecDeclare.subst(substDict)
555
556 def twoRegAcrossInstX(name, Name, opClass, types, rCount, op,
557 doubleDest=False, long=False):
558 global header_output, exec_output
559 destPrefix = "Big" if long else ""
560 eWalkCode = simd64EnabledCheckCode + '''
561 RegVect srcReg1;
562 %sRegVect destReg;
563 ''' % destPrefix
564 for reg in range(rCount):
565 eWalkCode += '''
566 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
567 ''' % { "reg" : reg }
568 eWalkCode += '''
569 destReg.regs[0] = 0;
570 %(destPrefix)sElement destElem = 0;
571 for (unsigned i = 0; i < eCount; i++) {
572 Element srcElem1 = gtoh(srcReg1.elements[i]);
573 if (i == 0) {
574 destElem = srcElem1;
575 } else {
576 %(op)s
577 }
578 }
579 destReg.elements[0] = htog(destElem);
580 ''' % { "op" : op, "destPrefix" : destPrefix }
581 destCnt = 2 if doubleDest else 1
582 for reg in range(destCnt):
583 eWalkCode += '''
584 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
585 ''' % { "reg" : reg }
586 for reg in range(destCnt, 4): # zero upper half
587 eWalkCode += '''
588 AA64FpDestP%(reg)d_uw = 0;
589 ''' % { "reg" : reg }
590 iop = InstObjParams(name, Name,
591 "DataX1RegOp",
592 { "code": eWalkCode,
593 "r_count": rCount,
594 "op_class": opClass }, [])
595 header_output += NeonX1RegOpDeclare.subst(iop)
596 if long:
597 exec_output += NeonXUnequalRegOpExecute.subst(iop)
598 else:
599 exec_output += NeonXEqualRegOpExecute.subst(iop)
600 for type in types:
601 substDict = { "targs" : type,
602 "class_name" : Name }
603 exec_output += NeonXExecDeclare.subst(substDict)
604
605 def twoRegCondenseInstX(name, Name, opClass, types, rCount, op,
606 readDest=False):
607 global header_output, exec_output
608 eWalkCode = simd64EnabledCheckCode + '''
609 RegVect srcRegs;
610 BigRegVect destReg;
611 '''
612 for reg in range(rCount):
613 eWalkCode += '''
614 srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
615 ''' % { "reg" : reg }
616 if readDest:
617 eWalkCode += '''
618 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
619 ''' % { "reg" : reg }
620 readDestCode = ''
621 if readDest:
622 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
623 eWalkCode += '''
624 for (unsigned i = 0; i < eCount / 2; i++) {
625 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
626 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
627 BigElement destElem;
628 %(readDest)s
629 %(op)s
630 destReg.elements[i] = htog(destElem);
631 }
632 ''' % { "op" : op, "readDest" : readDestCode }
633 for reg in range(rCount):
634 eWalkCode += '''
635 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
636 ''' % { "reg" : reg }
637 if rCount < 4: # zero upper half
638 for reg in range(rCount, 4):
639 eWalkCode += '''
640 AA64FpDestP%(reg)d_uw = 0;
641 ''' % { "reg" : reg }
642 iop = InstObjParams(name, Name,
643 "DataX1RegOp",
644 { "code": eWalkCode,
645 "r_count": rCount,
646 "op_class": opClass }, [])
647 header_output += NeonX1RegOpDeclare.subst(iop)
648 exec_output += NeonXUnequalRegOpExecute.subst(iop)
649 for type in types:
650 substDict = { "targs" : type,
651 "class_name" : Name }
652 exec_output += NeonXExecDeclare.subst(substDict)
653
654 def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False):
655 global header_output, exec_output
656 eWalkCode = simd64EnabledCheckCode + '''
657 RegVect destReg;
658 '''
659 if readDest:
660 for reg in range(rCount):
661 eWalkCode += '''
662 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
663 ''' % { "reg" : reg }
664 readDestCode = ''
665 if readDest:
666 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
667 eWalkCode += '''
668 for (unsigned i = 0; i < eCount; i++) {
669 Element destElem;
670 %(readDest)s
671 %(op)s
672 destReg.elements[i] = htog(destElem);
673 }
674 ''' % { "op" : op, "readDest" : readDestCode }
675 for reg in range(rCount):
676 eWalkCode += '''
677 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
678 ''' % { "reg" : reg }
679 if rCount < 4: # zero upper half
680 for reg in range(rCount, 4):
681 eWalkCode += '''
682 AA64FpDestP%(reg)d_uw = 0;
683 ''' % { "reg" : reg }
684 iop = InstObjParams(name, Name,
685 "DataXImmOnlyOp",
686 { "code": eWalkCode,
687 "r_count": rCount,
688 "op_class": opClass }, [])
689 header_output += NeonX1RegImmOnlyOpDeclare.subst(iop)
690 exec_output += NeonXEqualRegOpExecute.subst(iop)
691 for type in types:
692 substDict = { "targs" : type,
693 "class_name" : Name }
694 exec_output += NeonXExecDeclare.subst(substDict)
695
696 def dupGprInstX(name, Name, opClass, types, rCount, gprSpec):
697 global header_output, exec_output
698 eWalkCode = simd64EnabledCheckCode + '''
699 RegVect destReg;
700 for (unsigned i = 0; i < eCount; i++) {
701 destReg.elements[i] = htog((Element) %sOp1);
702 }
703 ''' % gprSpec
704 for reg in range(rCount):
705 eWalkCode += '''
706 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
707 ''' % { "reg" : reg }
708 if rCount < 4: # zero upper half
709 for reg in range(rCount, 4):
710 eWalkCode += '''
711 AA64FpDestP%(reg)d_uw = 0;
712 ''' % { "reg" : reg }
713 iop = InstObjParams(name, Name,
714 "DataX1RegOp",
715 { "code": eWalkCode,
716 "r_count": rCount,
717 "op_class": opClass }, [])
718 header_output += NeonX1RegOpDeclare.subst(iop)
719 exec_output += NeonXEqualRegOpExecute.subst(iop)
720 for type in types:
721 substDict = { "targs" : type,
722 "class_name" : Name }
723 exec_output += NeonXExecDeclare.subst(substDict)
724
725 def extInstX(name, Name, opClass, types, rCount, op):
726 global header_output, exec_output
727 eWalkCode = simd64EnabledCheckCode + '''
728 RegVect srcReg1, srcReg2, destReg;
729 '''
730 for reg in range(rCount):
731 eWalkCode += '''
732 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
733 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
734 ''' % { "reg" : reg }
735 eWalkCode += op
736 for reg in range(rCount):
737 eWalkCode += '''
738 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
739 ''' % { "reg" : reg }
740 if rCount < 4: # zero upper half
741 for reg in range(rCount, 4):
742 eWalkCode += '''
743 AA64FpDestP%(reg)d_uw = 0;
744 ''' % { "reg" : reg }
745 iop = InstObjParams(name, Name,
746 "DataX2RegImmOp",
747 { "code": eWalkCode,
748 "r_count": rCount,
749 "op_class": opClass }, [])
750 header_output += NeonX2RegImmOpDeclare.subst(iop)
751 exec_output += NeonXEqualRegOpExecute.subst(iop)
752 for type in types:
753 substDict = { "targs" : type,
754 "class_name" : Name }
755 exec_output += NeonXExecDeclare.subst(substDict)
756
757 def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec):
758 global header_output, exec_output
759 eWalkCode = simd64EnabledCheckCode + '''
760 RegVect destReg;
761 '''
762 for reg in range(rCount):
763 eWalkCode += '''
764 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
765 ''' % { "reg" : reg }
766 eWalkCode += '''
767 destReg.elements[imm] = htog((Element) %sOp1);
768 ''' % gprSpec
769 for reg in range(rCount):
770 eWalkCode += '''
771 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
772 ''' % { "reg" : reg }
773 iop = InstObjParams(name, Name,
774 "DataX1RegImmOp",
775 { "code": eWalkCode,
776 "r_count": rCount,
777 "op_class": opClass }, [])
778 header_output += NeonX1RegImmOpDeclare.subst(iop)
779 exec_output += NeonXEqualRegOpExecute.subst(iop)
780 for type in types:
781 substDict = { "targs" : type,
782 "class_name" : Name }
783 exec_output += NeonXExecDeclare.subst(substDict)
784
785 def insToGprInstX(name, Name, opClass, types, rCount, gprSpec,
786 signExt=False):
787 global header_output, exec_output
788 eWalkCode = simd64EnabledCheckCode + '''
789 FullRegVect srcReg;
790 '''
791 for reg in range(4):
792 eWalkCode += '''
793 srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
794 ''' % { "reg" : reg }
795 if signExt:
796 eWalkCode += '''
797 %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]);
798 ''' % gprSpec
799 else:
800 eWalkCode += '''
801 %sDest = srcReg.elements[imm];
802 ''' % gprSpec
803 iop = InstObjParams(name, Name,
804 "DataX1RegImmOp",
805 { "code": eWalkCode,
806 "r_count": rCount,
807 "op_class": opClass }, [])
808 header_output += NeonX1RegImmOpDeclare.subst(iop)
809 exec_output += NeonXEqualRegOpExecute.subst(iop)
810 for type in types:
811 substDict = { "targs" : type,
812 "class_name" : Name }
813 exec_output += NeonXExecDeclare.subst(substDict)
814
815 def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount):
816 global header_output, decoder_output, exec_output
817 code = simd64EnabledCheckCode + '''
818 union
819 {
820 uint8_t bytes[64];
821 FloatRegBits regs[16];
822 } table;
823
824 union
825 {
826 uint8_t bytes[%(rCount)d * 4];
827 FloatRegBits regs[%(rCount)d];
828 } destReg, srcReg2;
829
830 const unsigned length = %(length)d;
831 const bool isTbl = %(isTbl)s;
832 ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl }
833 for reg in range(rCount):
834 code += '''
835 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
836 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
837 ''' % { "reg" : reg }
838 for reg in range(16):
839 if reg < length * 4:
840 code += '''
841 table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw);
842 ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 }
843 else:
844 code += '''
845 table.regs[%(reg)d] = 0;
846 ''' % { "reg" : reg }
847 code += '''
848 for (unsigned i = 0; i < sizeof(destReg); i++) {
849 uint8_t index = srcReg2.bytes[i];
850 if (index < 16 * length) {
851 destReg.bytes[i] = table.bytes[index];
852 } else {
853 if (isTbl)
854 destReg.bytes[i] = 0;
855 // else destReg.bytes[i] unchanged
856 }
857 }
858 '''
859 for reg in range(rCount):
860 code += '''
861 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
862 ''' % { "reg" : reg }
863 if rCount < 4: # zero upper half
864 for reg in range(rCount, 4):
865 code += '''
866 AA64FpDestP%(reg)d_uw = 0;
867 ''' % { "reg" : reg }
868 iop = InstObjParams(name, Name,
869 "DataX2RegOp",
870 { "code": code,
871 "r_count": rCount,
872 "op_class": opClass }, [])
873 header_output += NeonX2RegOpDeclare.subst(iop)
874 exec_output += NeonXEqualRegOpExecute.subst(iop)
875 for type in types:
876 substDict = { "targs" : type,
877 "class_name" : Name }
878 exec_output += NeonXExecDeclare.subst(substDict)
879
880 # ABS
881 absCode = '''
882 if (srcElem1 < 0) {
883 destElem = -srcElem1;
884 } else {
885 destElem = srcElem1;
886 }
887 '''
888 twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode)
889 twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode)
890 # ADD
891 addCode = "destElem = srcElem1 + srcElem2;"
892 threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode)
893 threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode)
894 # ADDHN, ADDHN2
895 addhnCode = '''
896 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
897 (sizeof(Element) * 8);
898 '''
899 threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes,
900 addhnCode)
901 threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes,
902 addhnCode, hi=True)
903 # ADDP (scalar)
904 twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4,
905 addCode)
906 # ADDP (vector)
907 threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2,
908 addCode, pairwise=True)
909 threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4,
910 addCode, pairwise=True)
911 # ADDV
912 # Note: SimdAddOp can be a bit optimistic here
913 addAcrossCode = "destElem += srcElem1;"
914 twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"),
915 2, addAcrossCode)
916 twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4,
917 addAcrossCode)
918 # AND
919 andCode = "destElem = srcElem1 & srcElem2;"
920 threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode)
921 threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode)
922 # BIC (immediate)
923 bicImmCode = "destElem &= ~imm;"
924 oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2,
925 bicImmCode, True)
926 oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4,
927 bicImmCode, True)
928 # BIC (register)
929 bicCode = "destElem = srcElem1 & ~srcElem2;"
930 threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode)
931 threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode)
932 # BIF
933 bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);"
934 threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode,
935 True)
936 threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode,
937 True)
938 # BIT
939 bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);"
940 threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode,
941 True)
942 threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode,
943 True)
944 # BSL
945 bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);"
946 threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode,
947 True)
948 threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode,
949 True)
950 # CLS
951 clsCode = '''
952 unsigned count = 0;
953 if (srcElem1 < 0) {
954 srcElem1 <<= 1;
955 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
956 count++;
957 srcElem1 <<= 1;
958 }
959 } else {
960 srcElem1 <<= 1;
961 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
962 count++;
963 srcElem1 <<= 1;
964 }
965 }
966 destElem = count;
967 '''
968 twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode)
969 twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode)
970 # CLZ
971 clzCode = '''
972 unsigned count = 0;
973 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
974 count++;
975 srcElem1 <<= 1;
976 }
977 destElem = count;
978 '''
979 twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode)
980 twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode)
981 # CMEQ (register)
982 cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;"
983 threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2,
984 cmeqCode)
985 threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4,
986 cmeqCode)
987 # CMEQ (zero)
988 cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;"
989 twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2,
990 cmeqZeroCode)
991 twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4,
992 cmeqZeroCode)
993 # CMGE (register)
994 cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;"
995 threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode)
996 threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode)
997 # CMGE (zero)
998 cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;"
999 twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2,
1000 cmgeZeroCode)
1001 twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4,
1002 cmgeZeroCode)
1003 # CMGT (register)
1004 cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;"
1005 threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode)
1006 threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode)
1007 # CMGT (zero)
1008 cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;"
1009 twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2,
1010 cmgtZeroCode)
1011 twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4,
1012 cmgtZeroCode)
1013 # CMHI (register)
1014 threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2,
1015 cmgtCode)
1016 threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4,
1017 cmgtCode)
1018 # CMHS (register)
1019 threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2,
1020 cmgeCode)
1021 threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4,
1022 cmgeCode)
1023 # CMLE (zero)
1024 cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;"
1025 twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2,
1026 cmleZeroCode)
1027 twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4,
1028 cmleZeroCode)
1029 # CMLT (zero)
1030 cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;"
1031 twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2,
1032 cmltZeroCode)
1033 twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4,
1034 cmltZeroCode)
1035 # CMTST (register)
1036 tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;"
1037 threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2,
1038 tstCode)
1039 threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4,
1040 tstCode)
1041 # CNT
1042 cntCode = '''
1043 unsigned count = 0;
1044 while (srcElem1 && count < sizeof(Element) * 8) {
1045 count += srcElem1 & 0x1;
1046 srcElem1 >>= 1;
1047 }
1048 destElem = count;
1049 '''
1050 twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode)
1051 twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode)
1052 # DUP (element)
1053 dupCode = "destElem = srcElem1;"
1054 twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2,
1055 dupCode, isDup=True, byElem=True)
1056 twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4,
1057 dupCode, isDup=True, byElem=True)
1058 twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4,
1059 dupCode, isDup=True, byElem=True, scalar=True)
1060 # DUP (general register)
1061 dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W')
1062 dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
1063 dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X')
1064 # EOR
1065 eorCode = "destElem = srcElem1 ^ srcElem2;"
1066 threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode)
1067 threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode)
1068 # EXT
1069 extCode = '''
1070 for (unsigned i = 0; i < eCount; i++) {
1071 unsigned index = i + imm;
1072 if (index < eCount) {
1073 destReg.elements[i] = srcReg1.elements[index];
1074 } else {
1075 index -= eCount;
1076 if (index >= eCount) {
1077 fault = std::make_shared<UndefinedInstruction>(
1078 machInst, false, mnemonic);
1079 } else {
1080 destReg.elements[i] = srcReg2.elements[index];
1081 }
1082 }
1083 }
1084 '''
1085 extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode)
1086 extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode)
1087 # FABD
1088 fpOp = '''
1089 FPSCR fpscr = (FPSCR) FpscrExc;
1090 destElem = %s;
1091 FpscrExc = fpscr;
1092 '''
1093 fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))"
1094 threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2,
1095 fabdCode)
1096 threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4,
1097 fabdCode)
1098 threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4,
1099 fabdCode, scalar=True)
1100 # FABS
1101 fabsCode = fpOp % "fplibAbs<Element>(srcElem1)"
1102 twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2,
1103 fabsCode)
1104 twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4,
1105 fabsCode)
1106 # FACGE
1107 fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),"
1108 " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0")
1109 facgeCode = fpCmpAbsOp % "GE"
1110 threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes,
1111 2, facgeCode)
1112 threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4,
1113 facgeCode)
1114 threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4,
1115 facgeCode, scalar=True)
1116 # FACGT
1117 facgtCode = fpCmpAbsOp % "GT"
1118 threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes,
1119 2, facgtCode)
1120 threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4,
1121 facgtCode)
1122 threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4,
1123 facgtCode, scalar=True)
1124 # FADD
1125 fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)"
1126 faddCode = fpBinOp % "Add"
1127 threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2,
1128 faddCode)
1129 threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4,
1130 faddCode)
1131 # FADDP (scalar)
1132 twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp",
1133 ("uint32_t",), 2, faddCode)
1134 twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp",
1135 ("uint64_t",), 4, faddCode)
1136 # FADDP (vector)
1137 threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes,
1138 2, faddCode, pairwise=True)
1139 threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4,
1140 faddCode, pairwise=True)
1141 # FCMEQ (register)
1142 fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?"
1143 " -1 : 0")
1144 fcmeqCode = fpCmpOp % "EQ"
1145 threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes,
1146 2, fcmeqCode)
1147 threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4,
1148 fcmeqCode)
1149 threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4,
1150 fcmeqCode, scalar=True)
1151 # FCMEQ (zero)
1152 fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0"
1153 fcmeqZeroCode = fpCmpZeroOp % "EQ"
1154 twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1155 2, fcmeqZeroCode)
1156 twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1157 fcmeqZeroCode)
1158 twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1159 fcmeqZeroCode, scalar=True)
1160 # FCMGE (register)
1161 fcmgeCode = fpCmpOp % "GE"
1162 threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes,
1163 2, fcmgeCode)
1164 threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4,
1165 fcmgeCode)
1166 threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4,
1167 fcmgeCode, scalar=True)
1168 # FCMGE (zero)
1169 fcmgeZeroCode = fpCmpZeroOp % "GE"
1170 twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1171 2, fcmgeZeroCode)
1172 twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1173 fcmgeZeroCode)
1174 twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1175 fcmgeZeroCode, scalar=True)
1176 # FCMGT (register)
1177 fcmgtCode = fpCmpOp % "GT"
1178 threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes,
1179 2, fcmgtCode)
1180 threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4,
1181 fcmgtCode)
1182 threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4,
1183 fcmgtCode, scalar=True)
1184 # FCMGT (zero)
1185 fcmgtZeroCode = fpCmpZeroOp % "GT"
1186 twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1187 2, fcmgtZeroCode)
1188 twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1189 fcmgtZeroCode)
1190 twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1191 fcmgtZeroCode, scalar=True)
1192 # FCMLE (zero)
1193 fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?"
1194 " -1 : 0")
1195 fcmleZeroCode = fpCmpRevZeroOp % "GE"
1196 twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1197 2, fcmleZeroCode)
1198 twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1199 fcmleZeroCode)
1200 twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1201 fcmleZeroCode, scalar=True)
1202 # FCMLT (zero)
1203 fcmltZeroCode = fpCmpRevZeroOp % "GT"
1204 twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1205 2, fcmltZeroCode)
1206 twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1207 fcmltZeroCode)
1208 twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1209 fcmltZeroCode, scalar=True)
1210 # FCVTAS
1211 fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>("
1212 "srcElem1, %s, %s, %s, fpscr)")
1213 fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY")
1214 twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2,
1215 fcvtasCode)
1216 twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4,
1217 fcvtasCode)
1218 twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4,
1219 fcvtasCode, scalar=True)
1220 # FCVTAU
1221 fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY")
1222 twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2,
1223 fcvtauCode)
1224 twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4,
1225 fcvtauCode)
1226 twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4,
1227 fcvtauCode, scalar=True)
1228 # FCVTL, FCVTL2
1229 fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>("
1230 "srcElem1, FPCRRounding(fpscr), fpscr)")
1231 twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"),
1232 fcvtlCode)
1233 twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"),
1234 fcvtlCode, hi=True)
1235 # FCVTMS
1236 fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF")
1237 twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2,
1238 fcvtmsCode)
1239 twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4,
1240 fcvtmsCode)
1241 twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4,
1242 fcvtmsCode, scalar=True)
1243 # FCVTMU
1244 fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF")
1245 twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2,
1246 fcvtmuCode)
1247 twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4,
1248 fcvtmuCode)
1249 twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4,
1250 fcvtmuCode, scalar=True)
1251 # FCVTN, FCVTN2
1252 fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>("
1253 "srcElem1, FPCRRounding(fpscr), fpscr)")
1254 twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp",
1255 ("uint16_t", "uint32_t"), fcvtnCode)
1256 twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp",
1257 ("uint16_t", "uint32_t"), fcvtnCode, hi=True)
1258 # FCVTNS
1259 fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN")
1260 twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2,
1261 fcvtnsCode)
1262 twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4,
1263 fcvtnsCode)
1264 twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4,
1265 fcvtnsCode, scalar=True)
1266 # FCVTNU
1267 fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN")
1268 twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2,
1269 fcvtnuCode)
1270 twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4,
1271 fcvtnuCode)
1272 twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4,
1273 fcvtnuCode, scalar=True)
1274 # FCVTPS
1275 fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF")
1276 twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2,
1277 fcvtpsCode)
1278 twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4,
1279 fcvtpsCode)
1280 twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4,
1281 fcvtpsCode, scalar=True)
1282 # FCVTPU
1283 fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF")
1284 twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2,
1285 fcvtpuCode)
1286 twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4,
1287 fcvtpuCode)
1288 twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4,
1289 fcvtpuCode, scalar=True)
1290 # FCVTXN, FCVTXN2
1291 fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>("
1292 "srcElem1, FPRounding_ODD, fpscr)")
1293 twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes,
1294 fcvtxnCode)
1295 twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes,
1296 fcvtxnCode, hi=True)
1297 twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes,
1298 fcvtxnCode, scalar=True)
1299 # FCVTZS (fixed-point)
1300 fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO")
1301 twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes,
1302 2, fcvtzsCode, hasImm=True)
1303 twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4,
1304 fcvtzsCode, hasImm=True)
1305 twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4,
1306 fcvtzsCode, hasImm=True, scalar=True)
1307 # FCVTZS (integer)
1308 fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO")
1309 twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes,
1310 2, fcvtzsIntCode)
1311 twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4,
1312 fcvtzsIntCode)
1313 twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4,
1314 fcvtzsIntCode, scalar=True)
1315 # FCVTZU (fixed-point)
1316 fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO")
1317 twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes,
1318 2, fcvtzuCode, hasImm=True)
1319 twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4,
1320 fcvtzuCode, hasImm=True)
1321 twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4,
1322 fcvtzuCode, hasImm=True, scalar=True)
1323 # FCVTZU (integer)
1324 fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO")
1325 twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2,
1326 fcvtzuIntCode)
1327 twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4,
1328 fcvtzuIntCode)
1329 twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4,
1330 fcvtzuIntCode, scalar=True)
1331 # FDIV
1332 fdivCode = fpBinOp % "Div"
1333 threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2,
1334 fdivCode)
1335 threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4,
1336 fdivCode)
1337 # FMAX
1338 fmaxCode = fpBinOp % "Max"
1339 threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2,
1340 fmaxCode)
1341 threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4,
1342 fmaxCode)
1343 # FMAXNM
1344 fmaxnmCode = fpBinOp % "MaxNum"
1345 threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes,
1346 2, fmaxnmCode)
1347 threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4,
1348 fmaxnmCode)
1349 # FMAXNMP (scalar)
1350 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp",
1351 ("uint32_t",), 2, fmaxnmCode)
1352 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp",
1353 ("uint64_t",), 4, fmaxnmCode)
1354 # FMAXNMP (vector)
1355 threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp",
1356 smallFloatTypes, 2, fmaxnmCode, pairwise=True)
1357 threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4,
1358 fmaxnmCode, pairwise=True)
1359 # FMAXNMV
1360 # Note: SimdFloatCmpOp can be a bit optimistic here
1361 fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)"
1362 fmaxnmAcrossCode = fpAcrossOp % "MaxNum"
1363 twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",),
1364 4, fmaxnmAcrossCode)
1365 # FMAXP (scalar)
1366 twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp",
1367 ("uint32_t",), 2, fmaxCode)
1368 twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp",
1369 ("uint64_t",), 4, fmaxCode)
1370 # FMAXP (vector)
1371 threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes,
1372 2, fmaxCode, pairwise=True)
1373 threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4,
1374 fmaxCode, pairwise=True)
1375 # FMAXV
1376 # Note: SimdFloatCmpOp can be a bit optimistic here
1377 fmaxAcrossCode = fpAcrossOp % "Max"
1378 twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
1379 fmaxAcrossCode)
1380 # FMIN
1381 fminCode = fpBinOp % "Min"
1382 threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2,
1383 fminCode)
1384 threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4,
1385 fminCode)
1386 # FMINNM
1387 fminnmCode = fpBinOp % "MinNum"
1388 threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes,
1389 2, fminnmCode)
1390 threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4,
1391 fminnmCode)
1392 # FMINNMP (scalar)
1393 twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp",
1394 ("uint32_t",), 2, fminnmCode)
1395 twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp",
1396 ("uint64_t",), 4, fminnmCode)
1397 # FMINNMP (vector)
1398 threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp",
1399 smallFloatTypes, 2, fminnmCode, pairwise=True)
1400 threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4,
1401 fminnmCode, pairwise=True)
1402 # FMINNMV
1403 # Note: SimdFloatCmpOp can be a bit optimistic here
1404 fminnmAcrossCode = fpAcrossOp % "MinNum"
1405 twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",),
1406 4, fminnmAcrossCode)
1407 # FMINP (scalar)
1408 twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp",
1409 ("uint32_t",), 2, fminCode)
1410 twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp",
1411 ("uint64_t",), 4, fminCode)
1412 # FMINP (vector)
1413 threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes,
1414 2, fminCode, pairwise=True)
1415 threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4,
1416 fminCode, pairwise=True)
1417 # FMINV
1418 # Note: SimdFloatCmpOp can be a bit optimistic here
1419 fminAcrossCode = fpAcrossOp % "Min"
1420 twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
1421 fminAcrossCode)
1422 # FMLA (by element)
1423 fmlaCode = fpOp % ("fplibMulAdd<Element>("
1424 "destElem, srcElem1, srcElem2, fpscr)")
1425 threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp",
1426 smallFloatTypes, 2, fmlaCode, True, byElem=True)
1427 threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes,
1428 4, fmlaCode, True, byElem=True)
1429 threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes,
1430 4, fmlaCode, True, byElem=True, scalar=True)
1431 # FMLA (vector)
1432 threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes,
1433 2, fmlaCode, True)
1434 threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4,
1435 fmlaCode, True)
1436 # FMLS (by element)
1437 fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem,"
1438 " fplibNeg<Element>(srcElem1), srcElem2, fpscr)")
1439 threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp",
1440 smallFloatTypes, 2, fmlsCode, True, byElem=True)
1441 threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes,
1442 4, fmlsCode, True, byElem=True)
1443 threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes,
1444 4, fmlsCode, True, byElem=True, scalar=True)
1445 # FMLS (vector)
1446 threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes,
1447 2, fmlsCode, True)
1448 threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4,
1449 fmlsCode, True)
1450 # FMOV
1451 fmovCode = 'destElem = imm;'
1452 oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2,
1453 fmovCode)
1454 oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode)
1455 # FMUL (by element)
1456 fmulCode = fpBinOp % "Mul"
1457 threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp",
1458 smallFloatTypes, 2, fmulCode, byElem=True)
1459 threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4,
1460 fmulCode, byElem=True)
1461 threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4,
1462 fmulCode, byElem=True, scalar=True)
1463 # FMUL (vector)
1464 threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2,
1465 fmulCode)
1466 threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4,
1467 fmulCode)
1468 # FMULX
1469 fmulxCode = fpBinOp % "MulX"
1470 threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes,
1471 2, fmulxCode)
1472 threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4,
1473 fmulxCode)
1474 threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4,
1475 fmulxCode, scalar=True)
1476 # FMULX (by element)
1477 threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp",
1478 smallFloatTypes, 2, fmulxCode, byElem=True)
1479 threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes,
1480 4, fmulxCode, byElem=True)
1481 threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes,
1482 4, fmulxCode, byElem=True, scalar=True)
1483 # FNEG
1484 fnegCode = fpOp % "fplibNeg<Element>(srcElem1)"
1485 twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2,
1486 fnegCode)
1487 twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4,
1488 fnegCode)
1489 # FRECPE
1490 frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)"
1491 twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp",
1492 smallFloatTypes, 2, frecpeCode)
1493 twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4,
1494 frecpeCode)
1495 twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes,
1496 4, frecpeCode, scalar=True)
1497 # FRECPS
1498 frecpsCode = fpBinOp % "RecipStepFused"
1499 threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp",
1500 smallFloatTypes, 2, frecpsCode)
1501 threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes,
1502 4, frecpsCode)
1503 threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes,
1504 4, frecpsCode, scalar=True)
1505 # FRECPX
1506 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)"
1507 twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4,
1508 frecpxCode, scalar=True)
1509 # FRINTA
1510 frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)"
1511 frintaCode = frintCode % ("FPRounding_TIEAWAY", "false")
1512 twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2,
1513 frintaCode)
1514 twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4,
1515 frintaCode)
1516 # FRINTI
1517 frintiCode = frintCode % ("FPCRRounding(fpscr)", "false")
1518 twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2,
1519 frintiCode)
1520 twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4,
1521 frintiCode)
1522 # FRINTM
1523 frintmCode = frintCode % ("FPRounding_NEGINF", "false")
1524 twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2,
1525 frintmCode)
1526 twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4,
1527 frintmCode)
1528 # FRINTN
1529 frintnCode = frintCode % ("FPRounding_TIEEVEN", "false")
1530 twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2,
1531 frintnCode)
1532 twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4,
1533 frintnCode)
1534 # FRINTP
1535 frintpCode = frintCode % ("FPRounding_POSINF", "false")
1536 twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2,
1537 frintpCode)
1538 twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4,
1539 frintpCode)
1540 # FRINTX
1541 frintxCode = frintCode % ("FPCRRounding(fpscr)", "true")
1542 twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2,
1543 frintxCode)
1544 twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4,
1545 frintxCode)
1546 # FRINTZ
1547 frintzCode = frintCode % ("FPRounding_ZERO", "false")
1548 twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2,
1549 frintzCode)
1550 twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4,
1551 frintzCode)
1552 # FRSQRTE
1553 frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)"
1554 twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp",
1555 smallFloatTypes, 2, frsqrteCode)
1556 twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4,
1557 frsqrteCode)
1558 twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4,
1559 frsqrteCode, scalar=True)
1560 # FRSQRTS
1561 frsqrtsCode = fpBinOp % "RSqrtStepFused"
1562 threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp",
1563 smallFloatTypes, 2, frsqrtsCode)
1564 threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes,
1565 4, frsqrtsCode)
1566 threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes,
1567 4, frsqrtsCode, scalar=True)
1568 # FSQRT
1569 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)"
1570 twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2,
1571 fsqrtCode)
1572 twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4,
1573 fsqrtCode)
1574 # FSUB
1575 fsubCode = fpBinOp % "Sub"
1576 threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2,
1577 fsubCode)
1578 threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4,
1579 fsubCode)
1580 # INS (element)
1581 insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4)
1582 # INS (general register)
1583 insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4,
1584 'W')
1585 insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X')
1586 # MLA (by element)
1587 mlaCode = "destElem += srcElem1 * srcElem2;"
1588 threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp",
1589 ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True)
1590 threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp",
1591 ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True)
1592 # MLA (vector)
1593 threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2,
1594 mlaCode, True)
1595 threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4,
1596 mlaCode, True)
1597 # MLS (by element)
1598 mlsCode = "destElem -= srcElem1 * srcElem2;"
1599 threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp",
1600 ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True)
1601 threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp",
1602 ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True)
1603 # MLS (vector)
1604 threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2,
1605 mlsCode, True)
1606 threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4,
1607 mlsCode, True)
1608 # MOV (element) -> alias to INS (element)
1609 # MOV (from general) -> alias to INS (general register)
1610 # MOV (scalar) -> alias to DUP (element)
1611 # MOV (to general) -> alias to UMOV
1612 # MOV (vector) -> alias to ORR (register)
1613 # MOVI
1614 movImmCode = "destElem = imm;"
1615 oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2,
1616 movImmCode)
1617 oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4,
1618 movImmCode)
1619 # MUL (by element)
1620 mulCode = "destElem = srcElem1 * srcElem2;"
1621 threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp",
1622 ("uint16_t", "uint32_t"), 2, mulCode, byElem=True)
1623 threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp",
1624 ("uint16_t", "uint32_t"), 4, mulCode, byElem=True)
1625 # MUL (vector)
1626 threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2,
1627 mulCode)
1628 threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4,
1629 mulCode)
1630 # MVN
1631 mvnCode = "destElem = ~srcElem1;"
1632 twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode)
1633 twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode)
1634 # MVNI
1635 mvniCode = "destElem = ~imm;"
1636 oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode)
1637 oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode)
1638 # NEG
1639 negCode = "destElem = -srcElem1;"
1640 twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode)
1641 twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode)
1642 # NOT -> alias to MVN
1643 # ORN
1644 ornCode = "destElem = srcElem1 | ~srcElem2;"
1645 threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode)
1646 threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode)
1647 # ORR (immediate)
1648 orrImmCode = "destElem |= imm;"
1649 oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2,
1650 orrImmCode, True)
1651 oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4,
1652 orrImmCode, True)
1653 # ORR (register)
1654 orrCode = "destElem = srcElem1 | srcElem2;"
1655 threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode)
1656 threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode)
1657 # PMUL
1658 pmulCode = '''
1659 destElem = 0;
1660 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
1661 if (bits(srcElem2, j))
1662 destElem ^= srcElem1 << j;
1663 }
1664 '''
1665 threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2,
1666 pmulCode)
1667 threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4,
1668 pmulCode)
1669 # PMULL, PMULL2
1670 # Note: 64-bit PMULL is not available (Crypto. Extension)
1671 pmullCode = '''
1672 destElem = 0;
1673 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
1674 if (bits(srcElem2, j))
1675 destElem ^= (BigElement)srcElem1 << j;
1676 }
1677 '''
1678 threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode)
1679 threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",),
1680 pmullCode, hi=True)
1681 # RADDHN, RADDHN2
1682 raddhnCode = '''
1683 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1684 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1685 (sizeof(Element) * 8);
1686 '''
1687 threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes,
1688 raddhnCode)
1689 threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes,
1690 raddhnCode, hi=True)
1691 # RBIT
1692 rbitCode = '''
1693 destElem = 0;
1694 Element temp = srcElem1;
1695 for (int i = 0; i < 8 * sizeof(Element); i++) {
1696 destElem = destElem | ((temp & 0x1) <<
1697 (8 * sizeof(Element) - 1 - i));
1698 temp >>= 1;
1699 }
1700 '''
1701 twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode)
1702 twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode)
1703 # REV16
1704 rev16Code = '''
1705 destElem = srcElem1;
1706 unsigned groupSize = ((1 << 1) / sizeof(Element));
1707 unsigned reverseMask = (groupSize - 1);
1708 j = i ^ reverseMask;
1709 '''
1710 twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2,
1711 rev16Code)
1712 twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4,
1713 rev16Code)
1714 # REV32
1715 rev32Code = '''
1716 destElem = srcElem1;
1717 unsigned groupSize = ((1 << 2) / sizeof(Element));
1718 unsigned reverseMask = (groupSize - 1);
1719 j = i ^ reverseMask;
1720 '''
1721 twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"),
1722 2, rev32Code)
1723 twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"),
1724 4, rev32Code)
1725 # REV64
1726 rev64Code = '''
1727 destElem = srcElem1;
1728 unsigned groupSize = ((1 << 3) / sizeof(Element));
1729 unsigned reverseMask = (groupSize - 1);
1730 j = i ^ reverseMask;
1731 '''
1732 twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2,
1733 rev64Code)
1734 twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4,
1735 rev64Code)
1736 # RSHRN, RSHRN2
1737 rshrnCode = '''
1738 if (imm > sizeof(srcElem1) * 8) {
1739 destElem = 0;
1740 } else if (imm) {
1741 Element rBit = bits(srcElem1, imm - 1);
1742 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
1743 } else {
1744 destElem = srcElem1;
1745 }
1746 '''
1747 twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes,
1748 rshrnCode, hasImm=True)
1749 twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes,
1750 rshrnCode, hasImm=True, hi=True)
1751 # RSUBHN, RSUBHN2
1752 rsubhnCode = '''
1753 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1754 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1755 (sizeof(Element) * 8);
1756 '''
1757 threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes,
1758 rsubhnCode)
1759 threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes,
1760 rsubhnCode, hi=True)
1761 # SABA
1762 abaCode = '''
1763 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1764 (srcElem2 - srcElem1);
1765 '''
1766 threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2,
1767 abaCode, True)
1768 threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4,
1769 abaCode, True)
1770 # SABAL, SABAL2
1771 abalCode = '''
1772 destElem += (srcElem1 > srcElem2) ?
1773 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1774 ((BigElement)srcElem2 - (BigElement)srcElem1);
1775 '''
1776 threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes,
1777 abalCode, True)
1778 threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes,
1779 abalCode, True, hi=True)
1780 # SABD
1781 abdCode = '''
1782 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1783 (srcElem2 - srcElem1);
1784 '''
1785 threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2,
1786 abdCode)
1787 threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4,
1788 abdCode)
1789 # SABDL, SABDL2
1790 abdlCode = '''
1791 destElem = (srcElem1 > srcElem2) ?
1792 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1793 ((BigElement)srcElem2 - (BigElement)srcElem1);
1794 '''
1795 threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes,
1796 abdlCode, True)
1797 threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes,
1798 abdlCode, True, hi=True)
1799 # SADALP
1800 adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;"
1801 twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2,
1802 adalpCode, True)
1803 twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4,
1804 adalpCode, True)
1805 # SADDL, SADDL2
1806 addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;"
1807 threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes,
1808 addlwCode)
1809 threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes,
1810 addlwCode, hi=True)
1811 # SADDLP
1812 twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2,
1813 addlwCode)
1814 twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4,
1815 addlwCode)
1816 # SADDLV
1817 # Note: SimdAddOp can be a bit optimistic here
1818 addAcrossLongCode = "destElem += (BigElement)srcElem1;"
1819 twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"),
1820 2, addAcrossLongCode, long=True)
1821 twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"),
1822 4, addAcrossLongCode, long=True)
1823 twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4,
1824 addAcrossLongCode, doubleDest=True, long=True)
1825 # SADDW, SADDW2
1826 threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes,
1827 addlwCode)
1828 threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes,
1829 addlwCode, hi=True)
1830 # SCVTF (fixed-point)
1831 scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm,"
1832 " false, FPCRRounding(fpscr), fpscr)")
1833 twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
1834 scvtfFixedCode % 32, hasImm=True)
1835 twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4,
1836 scvtfFixedCode % 32, hasImm=True)
1837 twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4,
1838 scvtfFixedCode % 64, hasImm=True)
1839 twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes,
1840 4, scvtfFixedCode % 32, hasImm=True, scalar=True)
1841 twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4,
1842 scvtfFixedCode % 64, hasImm=True, scalar=True)
1843 # SCVTF (integer)
1844 scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0,"
1845 " false, FPCRRounding(fpscr), fpscr)")
1846 twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
1847 scvtfIntCode % 32)
1848 twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4,
1849 scvtfIntCode % 32)
1850 twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4,
1851 scvtfIntCode % 64)
1852 twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4,
1853 scvtfIntCode % 32, scalar=True)
1854 twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4,
1855 scvtfIntCode % 64, scalar=True)
1856 # SHADD
1857 haddCode = '''
1858 Element carryBit =
1859 (((unsigned)srcElem1 & 0x1) +
1860 ((unsigned)srcElem2 & 0x1)) >> 1;
1861 // Use division instead of a shift to ensure the sign extension works
1862 // right. The compiler will figure out if it can be a shift. Mask the
1863 // inputs so they get truncated correctly.
1864 destElem = (((srcElem1 & ~(Element)1) / 2) +
1865 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1866 '''
1867 threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2,
1868 haddCode)
1869 threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4,
1870 haddCode)
1871 # SHL
1872 shlCode = '''
1873 if (imm >= sizeof(Element) * 8)
1874 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
1875 else
1876 destElem = srcElem1 << imm;
1877 '''
1878 twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode,
1879 hasImm=True)
1880 twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode,
1881 hasImm=True)
1882 # SHLL, SHLL2
1883 shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);"
1884 twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode)
1885 twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode,
1886 hi=True)
1887 # SHRN, SHRN2
1888 shrnCode = '''
1889 if (imm >= sizeof(srcElem1) * 8) {
1890 destElem = 0;
1891 } else {
1892 destElem = srcElem1 >> imm;
1893 }
1894 '''
1895 twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes,
1896 shrnCode, hasImm=True)
1897 twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes,
1898 shrnCode, hasImm=True, hi=True)
1899 # SHSUB
1900 hsubCode = '''
1901 Element borrowBit =
1902 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1903 // Use division instead of a shift to ensure the sign extension works
1904 // right. The compiler will figure out if it can be a shift. Mask the
1905 // inputs so they get truncated correctly.
1906 destElem = (((srcElem1 & ~(Element)1) / 2) -
1907 ((srcElem2 & ~(Element)1) / 2)) - borrowBit;
1908 '''
1909 threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2,
1910 hsubCode)
1911 threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4,
1912 hsubCode)
1913 # SLI
1914 sliCode = '''
1915 if (imm >= sizeof(Element) * 8)
1916 destElem = destElem;
1917 else
1918 destElem = (srcElem1 << imm) | (destElem & mask(imm));
1919 '''
1920 twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode,
1921 True, hasImm=True)
1922 twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode,
1923 True, hasImm=True)
1924 # SMAX
1925 maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;"
1926 threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2,
1927 maxCode)
1928 threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4,
1929 maxCode)
1930 # SMAXP
1931 threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2,
1932 maxCode, pairwise=True)
1933 threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4,
1934 maxCode, pairwise=True)
1935 # SMAXV
1936 maxAcrossCode = '''
1937 if (i == 0 || srcElem1 > destElem)
1938 destElem = srcElem1;
1939 '''
1940 twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"),
1941 2, maxAcrossCode)
1942 twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4,
1943 maxAcrossCode)
1944 # SMIN
1945 minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;"
1946 threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2,
1947 minCode)
1948 threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4,
1949 minCode)
1950 # SMINP
1951 threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2,
1952 minCode, pairwise=True)
1953 threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4,
1954 minCode, pairwise=True)
1955 # SMINV
1956 minAcrossCode = '''
1957 if (i == 0 || srcElem1 < destElem)
1958 destElem = srcElem1;
1959 '''
1960 twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"),
1961 2, minAcrossCode)
1962 twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4,
1963 minAcrossCode)
1964
1965 split('exec')
1966
1967 # SMLAL, SMLAL2 (by element)
1968 mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;"
1969 threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp",
1970 ("int16_t", "int32_t"), mlalCode, True, byElem=True)
1971 threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp",
1972 ("int16_t", "int32_t"), mlalCode, True, byElem=True,
1973 hi=True)
1974 # SMLAL, SMLAL2 (vector)
1975 threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes,
1976 mlalCode, True)
1977 threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes,
1978 mlalCode, True, hi=True)
1979 # SMLSL, SMLSL2 (by element)
1980 mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;"
1981 threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes,
1982 mlslCode, True, byElem=True)
1983 threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp",
1984 smallSignedTypes, mlslCode, True, byElem=True, hi=True)
1985 # SMLSL, SMLSL2 (vector)
1986 threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes,
1987 mlslCode, True)
1988 threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes,
1989 mlslCode, True, hi=True)
1990 # SMOV
1991 insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4,
1992 'W', True)
1993 insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X',
1994 True)
1995 # SMULL, SMULL2 (by element)
1996 mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;"
1997 threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes,
1998 mullCode, byElem=True)
1999 threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes,
2000 mullCode, byElem=True, hi=True)
2001 # SMULL, SMULL2 (vector)
2002 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes,
2003 mullCode)
2004 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes,
2005 mullCode, hi=True)
2006 # SQABS
2007 sqabsCode = '''
2008 FPSCR fpscr = (FPSCR) FpscrQc;
1// -*- mode: c++ -*-
2
3// Copyright (c) 2012-2013, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Giacomo Gabrielli
39// Mbou Eyole
40
41let {{
42
43 header_output = ""
44 exec_output = ""
45 decoders = { 'Generic' : {} }
46
47 # FP types (FP operations always work with unsigned representations)
48 floatTypes = ("uint32_t", "uint64_t")
49 smallFloatTypes = ("uint32_t",)
50
51 def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
52 readDest=False, pairwise=False, scalar=False,
53 byElem=False, decoder='Generic'):
54 assert (not pairwise) or ((not byElem) and (not scalar))
55 global header_output, exec_output, decoders
56 eWalkCode = simd64EnabledCheckCode + '''
57 RegVect srcReg1, destReg;
58 '''
59 if byElem:
60 # 2nd register operand has to be read fully
61 eWalkCode += '''
62 FullRegVect srcReg2;
63 '''
64 else:
65 eWalkCode += '''
66 RegVect srcReg2;
67 '''
68 for reg in range(rCount):
69 eWalkCode += '''
70 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
71 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
72 ''' % { "reg" : reg }
73 if readDest:
74 eWalkCode += '''
75 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
76 ''' % { "reg" : reg }
77 if byElem:
78 # 2nd operand has to be read fully
79 for reg in range(rCount, 4):
80 eWalkCode += '''
81 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
82 ''' % { "reg" : reg }
83 readDestCode = ''
84 if readDest:
85 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
86 if pairwise:
87 eWalkCode += '''
88 for (unsigned i = 0; i < eCount; i++) {
89 Element srcElem1 = gtoh(2 * i < eCount ?
90 srcReg1.elements[2 * i] :
91 srcReg2.elements[2 * i - eCount]);
92 Element srcElem2 = gtoh(2 * i < eCount ?
93 srcReg1.elements[2 * i + 1] :
94 srcReg2.elements[2 * i + 1 - eCount]);
95 Element destElem;
96 %(readDest)s
97 %(op)s
98 destReg.elements[i] = htog(destElem);
99 }
100 ''' % { "op" : op, "readDest" : readDestCode }
101 else:
102 scalarCheck = '''
103 if (i != 0) {
104 destReg.elements[i] = 0;
105 continue;
106 }
107 '''
108 eWalkCode += '''
109 for (unsigned i = 0; i < eCount; i++) {
110 %(scalarCheck)s
111 Element srcElem1 = gtoh(srcReg1.elements[i]);
112 Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
113 Element destElem;
114 %(readDest)s
115 %(op)s
116 destReg.elements[i] = htog(destElem);
117 }
118 ''' % { "op" : op, "readDest" : readDestCode,
119 "scalarCheck" : scalarCheck if scalar else "",
120 "src2Index" : "imm" if byElem else "i" }
121 for reg in range(rCount):
122 eWalkCode += '''
123 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
124 ''' % { "reg" : reg }
125 if rCount < 4: # zero upper half
126 for reg in range(rCount, 4):
127 eWalkCode += '''
128 AA64FpDestP%(reg)d_uw = 0;
129 ''' % { "reg" : reg }
130 iop = InstObjParams(name, Name,
131 "DataX2RegImmOp" if byElem else "DataX2RegOp",
132 { "code": eWalkCode,
133 "r_count": rCount,
134 "op_class": opClass }, [])
135 if byElem:
136 header_output += NeonX2RegImmOpDeclare.subst(iop)
137 else:
138 header_output += NeonX2RegOpDeclare.subst(iop)
139 exec_output += NeonXEqualRegOpExecute.subst(iop)
140 for type in types:
141 substDict = { "targs" : type,
142 "class_name" : Name }
143 exec_output += NeonXExecDeclare.subst(substDict)
144
145 def threeUnequalRegInstX(name, Name, opClass, types, op,
146 bigSrc1, bigSrc2, bigDest, readDest, scalar=False,
147 byElem=False, hi=False):
148 assert not (scalar and hi)
149 global header_output, exec_output
150 src1Cnt = src2Cnt = destCnt = 2
151 src1Prefix = src2Prefix = destPrefix = ''
152 if bigSrc1:
153 src1Cnt = 4
154 src1Prefix = 'Big'
155 if bigSrc2:
156 src2Cnt = 4
157 src2Prefix = 'Big'
158 if bigDest:
159 destCnt = 4
160 destPrefix = 'Big'
161 if byElem:
162 src2Prefix = 'Full'
163 eWalkCode = simd64EnabledCheckCode + '''
164 %sRegVect srcReg1;
165 %sRegVect srcReg2;
166 %sRegVect destReg;
167 ''' % (src1Prefix, src2Prefix, destPrefix)
168 srcReg1 = 0
169 if hi and not bigSrc1: # long/widening operations
170 srcReg1 = 2
171 for reg in range(src1Cnt):
172 eWalkCode += '''
173 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw);
174 ''' % { "reg" : reg, "srcReg1" : srcReg1 }
175 srcReg1 += 1
176 srcReg2 = 0
177 if (not byElem) and (hi and not bigSrc2): # long/widening operations
178 srcReg2 = 2
179 for reg in range(src2Cnt):
180 eWalkCode += '''
181 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw);
182 ''' % { "reg" : reg, "srcReg2" : srcReg2 }
183 srcReg2 += 1
184 if byElem:
185 # 2nd operand has to be read fully
186 for reg in range(src2Cnt, 4):
187 eWalkCode += '''
188 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
189 ''' % { "reg" : reg }
190 if readDest:
191 for reg in range(destCnt):
192 eWalkCode += '''
193 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
194 ''' % { "reg" : reg }
195 readDestCode = ''
196 if readDest:
197 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
198 scalarCheck = '''
199 if (i != 0) {
200 destReg.elements[i] = 0;
201 continue;
202 }
203 '''
204 eWalkCode += '''
205 for (unsigned i = 0; i < eCount; i++) {
206 %(scalarCheck)s
207 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
208 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
209 %(destPrefix)sElement destElem;
210 %(readDest)s
211 %(op)s
212 destReg.elements[i] = htog(destElem);
213 }
214 ''' % { "op" : op, "readDest" : readDestCode,
215 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
216 "destPrefix" : destPrefix,
217 "scalarCheck" : scalarCheck if scalar else "",
218 "src2Index" : "imm" if byElem else "i" }
219 destReg = 0
220 if hi and not bigDest:
221 # narrowing operations
222 destReg = 2
223 for reg in range(destCnt):
224 eWalkCode += '''
225 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
226 ''' % { "reg" : reg, "destReg": destReg }
227 destReg += 1
228 if destCnt < 4 and not hi: # zero upper half
229 for reg in range(destCnt, 4):
230 eWalkCode += '''
231 AA64FpDestP%(reg)d_uw = 0;
232 ''' % { "reg" : reg }
233 iop = InstObjParams(name, Name,
234 "DataX2RegImmOp" if byElem else "DataX2RegOp",
235 { "code": eWalkCode,
236 "r_count": 2,
237 "op_class": opClass }, [])
238 if byElem:
239 header_output += NeonX2RegImmOpDeclare.subst(iop)
240 else:
241 header_output += NeonX2RegOpDeclare.subst(iop)
242 exec_output += NeonXUnequalRegOpExecute.subst(iop)
243 for type in types:
244 substDict = { "targs" : type,
245 "class_name" : Name }
246 exec_output += NeonXExecDeclare.subst(substDict)
247
248 def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
249 scalar=False, byElem=False, hi=False):
250 assert not byElem
251 threeUnequalRegInstX(name, Name, opClass, types, op,
252 True, True, False, readDest, scalar, byElem, hi)
253
254 def threeRegLongInstX(name, Name, opClass, types, op, readDest=False,
255 scalar=False, byElem=False, hi=False):
256 threeUnequalRegInstX(name, Name, opClass, types, op,
257 False, False, True, readDest, scalar, byElem, hi)
258
259 def threeRegWideInstX(name, Name, opClass, types, op, readDest=False,
260 scalar=False, byElem=False, hi=False):
261 assert not byElem
262 threeUnequalRegInstX(name, Name, opClass, types, op,
263 True, False, True, readDest, scalar, byElem, hi)
264
265 def twoEqualRegInstX(name, Name, opClass, types, rCount, op,
266 readDest=False, scalar=False, byElem=False,
267 hasImm=False, isDup=False):
268 global header_output, exec_output
269 assert (not isDup) or byElem
270 if byElem:
271 hasImm = True
272 if isDup:
273 eWalkCode = simd64EnabledCheckCode + '''
274 FullRegVect srcReg1;
275 RegVect destReg;
276 '''
277 else:
278 eWalkCode = simd64EnabledCheckCode + '''
279 RegVect srcReg1, destReg;
280 '''
281 for reg in range(4 if isDup else rCount):
282 eWalkCode += '''
283 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
284 ''' % { "reg" : reg }
285 if readDest:
286 eWalkCode += '''
287 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
288 ''' % { "reg" : reg }
289 readDestCode = ''
290 if readDest:
291 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
292 scalarCheck = '''
293 if (i != 0) {
294 destReg.elements[i] = 0;
295 continue;
296 }
297 '''
298 eWalkCode += '''
299 for (unsigned i = 0; i < eCount; i++) {
300 %(scalarCheck)s
301 unsigned j = i;
302 Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]);
303 Element destElem;
304 %(readDest)s
305 %(op)s
306 destReg.elements[j] = htog(destElem);
307 }
308 ''' % { "op" : op, "readDest" : readDestCode,
309 "scalarCheck" : scalarCheck if scalar else "",
310 "src1Index" : "imm" if byElem else "i" }
311 for reg in range(rCount):
312 eWalkCode += '''
313 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
314 ''' % { "reg" : reg }
315 if rCount < 4: # zero upper half
316 for reg in range(rCount, 4):
317 eWalkCode += '''
318 AA64FpDestP%(reg)d_uw = 0;
319 ''' % { "reg" : reg }
320 iop = InstObjParams(name, Name,
321 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
322 { "code": eWalkCode,
323 "r_count": rCount,
324 "op_class": opClass }, [])
325 if hasImm:
326 header_output += NeonX1RegImmOpDeclare.subst(iop)
327 else:
328 header_output += NeonX1RegOpDeclare.subst(iop)
329 exec_output += NeonXEqualRegOpExecute.subst(iop)
330 for type in types:
331 substDict = { "targs" : type,
332 "class_name" : Name }
333 exec_output += NeonXExecDeclare.subst(substDict)
334
335 def twoRegLongInstX(name, Name, opClass, types, op, readDest=False,
336 hi=False, hasImm=False):
337 global header_output, exec_output
338 eWalkCode = simd64EnabledCheckCode + '''
339 RegVect srcReg1;
340 BigRegVect destReg;
341 '''
342 destReg = 0 if not hi else 2
343 for reg in range(2):
344 eWalkCode += '''
345 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw);
346 ''' % { "reg" : reg, "destReg": destReg }
347 destReg += 1
348 destReg = 0 if not hi else 2
349 if readDest:
350 for reg in range(4):
351 eWalkCode += '''
352 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
353 ''' % { "reg" : reg }
354 destReg += 1
355 readDestCode = ''
356 if readDest:
357 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
358 eWalkCode += '''
359 for (unsigned i = 0; i < eCount; i++) {
360 Element srcElem1 = gtoh(srcReg1.elements[i]);
361 BigElement destElem;
362 %(readDest)s
363 %(op)s
364 destReg.elements[i] = htog(destElem);
365 }
366 ''' % { "op" : op, "readDest" : readDestCode }
367 for reg in range(4):
368 eWalkCode += '''
369 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
370 ''' % { "reg" : reg }
371 iop = InstObjParams(name, Name,
372 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
373 { "code": eWalkCode,
374 "r_count": 2,
375 "op_class": opClass }, [])
376 if hasImm:
377 header_output += NeonX1RegImmOpDeclare.subst(iop)
378 else:
379 header_output += NeonX1RegOpDeclare.subst(iop)
380 exec_output += NeonXUnequalRegOpExecute.subst(iop)
381 for type in types:
382 substDict = { "targs" : type,
383 "class_name" : Name }
384 exec_output += NeonXExecDeclare.subst(substDict)
385
386 def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
387 scalar=False, hi=False, hasImm=False):
388 global header_output, exec_output
389 eWalkCode = simd64EnabledCheckCode + '''
390 BigRegVect srcReg1;
391 RegVect destReg;
392 '''
393 for reg in range(4):
394 eWalkCode += '''
395 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
396 ''' % { "reg" : reg }
397 if readDest:
398 for reg in range(2):
399 eWalkCode += '''
400 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
401 ''' % { "reg" : reg }
402 else:
403 eWalkCode += '''
404 destReg.elements[0] = 0;
405 ''' % { "reg" : reg }
406 readDestCode = ''
407 if readDest:
408 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
409 scalarCheck = '''
410 if (i != 0) {
411 destReg.elements[i] = 0;
412 continue;
413 }
414 '''
415 eWalkCode += '''
416 for (unsigned i = 0; i < eCount; i++) {
417 %(scalarCheck)s
418 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
419 Element destElem;
420 %(readDest)s
421 %(op)s
422 destReg.elements[i] = htog(destElem);
423 }
424 ''' % { "op" : op, "readDest" : readDestCode,
425 "scalarCheck" : scalarCheck if scalar else "" }
426 destReg = 0 if not hi else 2
427 for reg in range(2):
428 eWalkCode += '''
429 AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
430 ''' % { "reg" : reg, "destReg": destReg }
431 destReg += 1
432 if not hi:
433 for reg in range(2, 4): # zero upper half
434 eWalkCode += '''
435 AA64FpDestP%(reg)d_uw = 0;
436 ''' % { "reg" : reg }
437 iop = InstObjParams(name, Name,
438 "DataX1RegImmOp" if hasImm else "DataX1RegOp",
439 { "code": eWalkCode,
440 "r_count": 2,
441 "op_class": opClass }, [])
442 if hasImm:
443 header_output += NeonX1RegImmOpDeclare.subst(iop)
444 else:
445 header_output += NeonX1RegOpDeclare.subst(iop)
446 exec_output += NeonXUnequalRegOpExecute.subst(iop)
447 for type in types:
448 substDict = { "targs" : type,
449 "class_name" : Name }
450 exec_output += NeonXExecDeclare.subst(substDict)
451
452 def threeRegScrambleInstX(name, Name, opClass, types, rCount, op):
453 global header_output, exec_output
454 eWalkCode = simd64EnabledCheckCode + '''
455 RegVect srcReg1, srcReg2, destReg;
456 '''
457 for reg in range(rCount):
458 eWalkCode += '''
459 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
460 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
461 ''' % { "reg" : reg }
462 eWalkCode += op
463 for reg in range(rCount):
464 eWalkCode += '''
465 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
466 ''' % { "reg" : reg }
467 if rCount < 4:
468 for reg in range(rCount, 4):
469 eWalkCode += '''
470 AA64FpDestP%(reg)d_uw = 0;
471 ''' % { "reg" : reg }
472 iop = InstObjParams(name, Name,
473 "DataX2RegOp",
474 { "code": eWalkCode,
475 "r_count": rCount,
476 "op_class": opClass }, [])
477 header_output += NeonX2RegOpDeclare.subst(iop)
478 exec_output += NeonXEqualRegOpExecute.subst(iop)
479 for type in types:
480 substDict = { "targs" : type,
481 "class_name" : Name }
482 exec_output += NeonXExecDeclare.subst(substDict)
483
484 def insFromVecElemInstX(name, Name, opClass, types, rCount):
485 global header_output, exec_output
486 eWalkCode = simd64EnabledCheckCode + '''
487 FullRegVect srcReg1;
488 RegVect destReg;
489 '''
490 for reg in range(4):
491 eWalkCode += '''
492 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
493 ''' % { "reg" : reg }
494 for reg in range(rCount):
495 eWalkCode += '''
496 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
497 ''' % { "reg" : reg }
498 eWalkCode += '''
499 Element srcElem1 = gtoh(srcReg1.elements[imm2]);
500 Element destElem = srcElem1;
501 destReg.elements[imm1] = htog(destElem);
502 '''
503 for reg in range(rCount):
504 eWalkCode += '''
505 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
506 ''' % { "reg" : reg }
507 iop = InstObjParams(name, Name,
508 "DataX1Reg2ImmOp",
509 { "code": eWalkCode,
510 "r_count": rCount,
511 "op_class": opClass }, [])
512 header_output += NeonX1Reg2ImmOpDeclare.subst(iop)
513 exec_output += NeonXEqualRegOpExecute.subst(iop)
514 for type in types:
515 substDict = { "targs" : type,
516 "class_name" : Name }
517 exec_output += NeonXExecDeclare.subst(substDict)
518
519 def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op):
520 global header_output, exec_output
521 eWalkCode = simd64EnabledCheckCode + '''
522 RegVect srcReg1, destReg;
523 '''
524 for reg in range(rCount):
525 eWalkCode += '''
526 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
527 ''' % { "reg" : reg }
528 eWalkCode += '''
529 Element srcElem1 = gtoh(srcReg1.elements[0]);
530 Element srcElem2 = gtoh(srcReg1.elements[1]);
531 Element destElem;
532 %(op)s
533 destReg.elements[0] = htog(destElem);
534 ''' % { "op" : op }
535 destCnt = rCount / 2
536 for reg in range(destCnt):
537 eWalkCode += '''
538 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
539 ''' % { "reg" : reg }
540 for reg in range(destCnt, 4): # zero upper half
541 eWalkCode += '''
542 AA64FpDestP%(reg)d_uw = 0;
543 ''' % { "reg" : reg }
544 iop = InstObjParams(name, Name,
545 "DataX1RegOp",
546 { "code": eWalkCode,
547 "r_count": rCount,
548 "op_class": opClass }, [])
549 header_output += NeonX1RegOpDeclare.subst(iop)
550 exec_output += NeonXEqualRegOpExecute.subst(iop)
551 for type in types:
552 substDict = { "targs" : type,
553 "class_name" : Name }
554 exec_output += NeonXExecDeclare.subst(substDict)
555
556 def twoRegAcrossInstX(name, Name, opClass, types, rCount, op,
557 doubleDest=False, long=False):
558 global header_output, exec_output
559 destPrefix = "Big" if long else ""
560 eWalkCode = simd64EnabledCheckCode + '''
561 RegVect srcReg1;
562 %sRegVect destReg;
563 ''' % destPrefix
564 for reg in range(rCount):
565 eWalkCode += '''
566 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
567 ''' % { "reg" : reg }
568 eWalkCode += '''
569 destReg.regs[0] = 0;
570 %(destPrefix)sElement destElem = 0;
571 for (unsigned i = 0; i < eCount; i++) {
572 Element srcElem1 = gtoh(srcReg1.elements[i]);
573 if (i == 0) {
574 destElem = srcElem1;
575 } else {
576 %(op)s
577 }
578 }
579 destReg.elements[0] = htog(destElem);
580 ''' % { "op" : op, "destPrefix" : destPrefix }
581 destCnt = 2 if doubleDest else 1
582 for reg in range(destCnt):
583 eWalkCode += '''
584 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
585 ''' % { "reg" : reg }
586 for reg in range(destCnt, 4): # zero upper half
587 eWalkCode += '''
588 AA64FpDestP%(reg)d_uw = 0;
589 ''' % { "reg" : reg }
590 iop = InstObjParams(name, Name,
591 "DataX1RegOp",
592 { "code": eWalkCode,
593 "r_count": rCount,
594 "op_class": opClass }, [])
595 header_output += NeonX1RegOpDeclare.subst(iop)
596 if long:
597 exec_output += NeonXUnequalRegOpExecute.subst(iop)
598 else:
599 exec_output += NeonXEqualRegOpExecute.subst(iop)
600 for type in types:
601 substDict = { "targs" : type,
602 "class_name" : Name }
603 exec_output += NeonXExecDeclare.subst(substDict)
604
605 def twoRegCondenseInstX(name, Name, opClass, types, rCount, op,
606 readDest=False):
607 global header_output, exec_output
608 eWalkCode = simd64EnabledCheckCode + '''
609 RegVect srcRegs;
610 BigRegVect destReg;
611 '''
612 for reg in range(rCount):
613 eWalkCode += '''
614 srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
615 ''' % { "reg" : reg }
616 if readDest:
617 eWalkCode += '''
618 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
619 ''' % { "reg" : reg }
620 readDestCode = ''
621 if readDest:
622 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
623 eWalkCode += '''
624 for (unsigned i = 0; i < eCount / 2; i++) {
625 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
626 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
627 BigElement destElem;
628 %(readDest)s
629 %(op)s
630 destReg.elements[i] = htog(destElem);
631 }
632 ''' % { "op" : op, "readDest" : readDestCode }
633 for reg in range(rCount):
634 eWalkCode += '''
635 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
636 ''' % { "reg" : reg }
637 if rCount < 4: # zero upper half
638 for reg in range(rCount, 4):
639 eWalkCode += '''
640 AA64FpDestP%(reg)d_uw = 0;
641 ''' % { "reg" : reg }
642 iop = InstObjParams(name, Name,
643 "DataX1RegOp",
644 { "code": eWalkCode,
645 "r_count": rCount,
646 "op_class": opClass }, [])
647 header_output += NeonX1RegOpDeclare.subst(iop)
648 exec_output += NeonXUnequalRegOpExecute.subst(iop)
649 for type in types:
650 substDict = { "targs" : type,
651 "class_name" : Name }
652 exec_output += NeonXExecDeclare.subst(substDict)
653
654 def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False):
655 global header_output, exec_output
656 eWalkCode = simd64EnabledCheckCode + '''
657 RegVect destReg;
658 '''
659 if readDest:
660 for reg in range(rCount):
661 eWalkCode += '''
662 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
663 ''' % { "reg" : reg }
664 readDestCode = ''
665 if readDest:
666 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
667 eWalkCode += '''
668 for (unsigned i = 0; i < eCount; i++) {
669 Element destElem;
670 %(readDest)s
671 %(op)s
672 destReg.elements[i] = htog(destElem);
673 }
674 ''' % { "op" : op, "readDest" : readDestCode }
675 for reg in range(rCount):
676 eWalkCode += '''
677 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
678 ''' % { "reg" : reg }
679 if rCount < 4: # zero upper half
680 for reg in range(rCount, 4):
681 eWalkCode += '''
682 AA64FpDestP%(reg)d_uw = 0;
683 ''' % { "reg" : reg }
684 iop = InstObjParams(name, Name,
685 "DataXImmOnlyOp",
686 { "code": eWalkCode,
687 "r_count": rCount,
688 "op_class": opClass }, [])
689 header_output += NeonX1RegImmOnlyOpDeclare.subst(iop)
690 exec_output += NeonXEqualRegOpExecute.subst(iop)
691 for type in types:
692 substDict = { "targs" : type,
693 "class_name" : Name }
694 exec_output += NeonXExecDeclare.subst(substDict)
695
696 def dupGprInstX(name, Name, opClass, types, rCount, gprSpec):
697 global header_output, exec_output
698 eWalkCode = simd64EnabledCheckCode + '''
699 RegVect destReg;
700 for (unsigned i = 0; i < eCount; i++) {
701 destReg.elements[i] = htog((Element) %sOp1);
702 }
703 ''' % gprSpec
704 for reg in range(rCount):
705 eWalkCode += '''
706 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
707 ''' % { "reg" : reg }
708 if rCount < 4: # zero upper half
709 for reg in range(rCount, 4):
710 eWalkCode += '''
711 AA64FpDestP%(reg)d_uw = 0;
712 ''' % { "reg" : reg }
713 iop = InstObjParams(name, Name,
714 "DataX1RegOp",
715 { "code": eWalkCode,
716 "r_count": rCount,
717 "op_class": opClass }, [])
718 header_output += NeonX1RegOpDeclare.subst(iop)
719 exec_output += NeonXEqualRegOpExecute.subst(iop)
720 for type in types:
721 substDict = { "targs" : type,
722 "class_name" : Name }
723 exec_output += NeonXExecDeclare.subst(substDict)
724
725 def extInstX(name, Name, opClass, types, rCount, op):
726 global header_output, exec_output
727 eWalkCode = simd64EnabledCheckCode + '''
728 RegVect srcReg1, srcReg2, destReg;
729 '''
730 for reg in range(rCount):
731 eWalkCode += '''
732 srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
733 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
734 ''' % { "reg" : reg }
735 eWalkCode += op
736 for reg in range(rCount):
737 eWalkCode += '''
738 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
739 ''' % { "reg" : reg }
740 if rCount < 4: # zero upper half
741 for reg in range(rCount, 4):
742 eWalkCode += '''
743 AA64FpDestP%(reg)d_uw = 0;
744 ''' % { "reg" : reg }
745 iop = InstObjParams(name, Name,
746 "DataX2RegImmOp",
747 { "code": eWalkCode,
748 "r_count": rCount,
749 "op_class": opClass }, [])
750 header_output += NeonX2RegImmOpDeclare.subst(iop)
751 exec_output += NeonXEqualRegOpExecute.subst(iop)
752 for type in types:
753 substDict = { "targs" : type,
754 "class_name" : Name }
755 exec_output += NeonXExecDeclare.subst(substDict)
756
757 def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec):
758 global header_output, exec_output
759 eWalkCode = simd64EnabledCheckCode + '''
760 RegVect destReg;
761 '''
762 for reg in range(rCount):
763 eWalkCode += '''
764 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
765 ''' % { "reg" : reg }
766 eWalkCode += '''
767 destReg.elements[imm] = htog((Element) %sOp1);
768 ''' % gprSpec
769 for reg in range(rCount):
770 eWalkCode += '''
771 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
772 ''' % { "reg" : reg }
773 iop = InstObjParams(name, Name,
774 "DataX1RegImmOp",
775 { "code": eWalkCode,
776 "r_count": rCount,
777 "op_class": opClass }, [])
778 header_output += NeonX1RegImmOpDeclare.subst(iop)
779 exec_output += NeonXEqualRegOpExecute.subst(iop)
780 for type in types:
781 substDict = { "targs" : type,
782 "class_name" : Name }
783 exec_output += NeonXExecDeclare.subst(substDict)
784
785 def insToGprInstX(name, Name, opClass, types, rCount, gprSpec,
786 signExt=False):
787 global header_output, exec_output
788 eWalkCode = simd64EnabledCheckCode + '''
789 FullRegVect srcReg;
790 '''
791 for reg in range(4):
792 eWalkCode += '''
793 srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
794 ''' % { "reg" : reg }
795 if signExt:
796 eWalkCode += '''
797 %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]);
798 ''' % gprSpec
799 else:
800 eWalkCode += '''
801 %sDest = srcReg.elements[imm];
802 ''' % gprSpec
803 iop = InstObjParams(name, Name,
804 "DataX1RegImmOp",
805 { "code": eWalkCode,
806 "r_count": rCount,
807 "op_class": opClass }, [])
808 header_output += NeonX1RegImmOpDeclare.subst(iop)
809 exec_output += NeonXEqualRegOpExecute.subst(iop)
810 for type in types:
811 substDict = { "targs" : type,
812 "class_name" : Name }
813 exec_output += NeonXExecDeclare.subst(substDict)
814
815 def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount):
816 global header_output, decoder_output, exec_output
817 code = simd64EnabledCheckCode + '''
818 union
819 {
820 uint8_t bytes[64];
821 FloatRegBits regs[16];
822 } table;
823
824 union
825 {
826 uint8_t bytes[%(rCount)d * 4];
827 FloatRegBits regs[%(rCount)d];
828 } destReg, srcReg2;
829
830 const unsigned length = %(length)d;
831 const bool isTbl = %(isTbl)s;
832 ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl }
833 for reg in range(rCount):
834 code += '''
835 srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
836 destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
837 ''' % { "reg" : reg }
838 for reg in range(16):
839 if reg < length * 4:
840 code += '''
841 table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw);
842 ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 }
843 else:
844 code += '''
845 table.regs[%(reg)d] = 0;
846 ''' % { "reg" : reg }
847 code += '''
848 for (unsigned i = 0; i < sizeof(destReg); i++) {
849 uint8_t index = srcReg2.bytes[i];
850 if (index < 16 * length) {
851 destReg.bytes[i] = table.bytes[index];
852 } else {
853 if (isTbl)
854 destReg.bytes[i] = 0;
855 // else destReg.bytes[i] unchanged
856 }
857 }
858 '''
859 for reg in range(rCount):
860 code += '''
861 AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
862 ''' % { "reg" : reg }
863 if rCount < 4: # zero upper half
864 for reg in range(rCount, 4):
865 code += '''
866 AA64FpDestP%(reg)d_uw = 0;
867 ''' % { "reg" : reg }
868 iop = InstObjParams(name, Name,
869 "DataX2RegOp",
870 { "code": code,
871 "r_count": rCount,
872 "op_class": opClass }, [])
873 header_output += NeonX2RegOpDeclare.subst(iop)
874 exec_output += NeonXEqualRegOpExecute.subst(iop)
875 for type in types:
876 substDict = { "targs" : type,
877 "class_name" : Name }
878 exec_output += NeonXExecDeclare.subst(substDict)
879
880 # ABS
881 absCode = '''
882 if (srcElem1 < 0) {
883 destElem = -srcElem1;
884 } else {
885 destElem = srcElem1;
886 }
887 '''
888 twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode)
889 twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode)
890 # ADD
891 addCode = "destElem = srcElem1 + srcElem2;"
892 threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode)
893 threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode)
894 # ADDHN, ADDHN2
895 addhnCode = '''
896 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
897 (sizeof(Element) * 8);
898 '''
899 threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes,
900 addhnCode)
901 threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes,
902 addhnCode, hi=True)
903 # ADDP (scalar)
904 twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4,
905 addCode)
906 # ADDP (vector)
907 threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2,
908 addCode, pairwise=True)
909 threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4,
910 addCode, pairwise=True)
911 # ADDV
912 # Note: SimdAddOp can be a bit optimistic here
913 addAcrossCode = "destElem += srcElem1;"
914 twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"),
915 2, addAcrossCode)
916 twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4,
917 addAcrossCode)
918 # AND
919 andCode = "destElem = srcElem1 & srcElem2;"
920 threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode)
921 threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode)
922 # BIC (immediate)
923 bicImmCode = "destElem &= ~imm;"
924 oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2,
925 bicImmCode, True)
926 oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4,
927 bicImmCode, True)
928 # BIC (register)
929 bicCode = "destElem = srcElem1 & ~srcElem2;"
930 threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode)
931 threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode)
932 # BIF
933 bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);"
934 threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode,
935 True)
936 threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode,
937 True)
938 # BIT
939 bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);"
940 threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode,
941 True)
942 threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode,
943 True)
944 # BSL
945 bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);"
946 threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode,
947 True)
948 threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode,
949 True)
950 # CLS
951 clsCode = '''
952 unsigned count = 0;
953 if (srcElem1 < 0) {
954 srcElem1 <<= 1;
955 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
956 count++;
957 srcElem1 <<= 1;
958 }
959 } else {
960 srcElem1 <<= 1;
961 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
962 count++;
963 srcElem1 <<= 1;
964 }
965 }
966 destElem = count;
967 '''
968 twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode)
969 twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode)
970 # CLZ
971 clzCode = '''
972 unsigned count = 0;
973 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
974 count++;
975 srcElem1 <<= 1;
976 }
977 destElem = count;
978 '''
979 twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode)
980 twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode)
981 # CMEQ (register)
982 cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;"
983 threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2,
984 cmeqCode)
985 threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4,
986 cmeqCode)
987 # CMEQ (zero)
988 cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;"
989 twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2,
990 cmeqZeroCode)
991 twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4,
992 cmeqZeroCode)
993 # CMGE (register)
994 cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;"
995 threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode)
996 threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode)
997 # CMGE (zero)
998 cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;"
999 twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2,
1000 cmgeZeroCode)
1001 twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4,
1002 cmgeZeroCode)
1003 # CMGT (register)
1004 cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;"
1005 threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode)
1006 threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode)
1007 # CMGT (zero)
1008 cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;"
1009 twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2,
1010 cmgtZeroCode)
1011 twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4,
1012 cmgtZeroCode)
1013 # CMHI (register)
1014 threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2,
1015 cmgtCode)
1016 threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4,
1017 cmgtCode)
1018 # CMHS (register)
1019 threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2,
1020 cmgeCode)
1021 threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4,
1022 cmgeCode)
1023 # CMLE (zero)
1024 cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;"
1025 twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2,
1026 cmleZeroCode)
1027 twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4,
1028 cmleZeroCode)
1029 # CMLT (zero)
1030 cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;"
1031 twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2,
1032 cmltZeroCode)
1033 twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4,
1034 cmltZeroCode)
1035 # CMTST (register)
1036 tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;"
1037 threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2,
1038 tstCode)
1039 threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4,
1040 tstCode)
1041 # CNT
1042 cntCode = '''
1043 unsigned count = 0;
1044 while (srcElem1 && count < sizeof(Element) * 8) {
1045 count += srcElem1 & 0x1;
1046 srcElem1 >>= 1;
1047 }
1048 destElem = count;
1049 '''
1050 twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode)
1051 twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode)
1052 # DUP (element)
1053 dupCode = "destElem = srcElem1;"
1054 twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2,
1055 dupCode, isDup=True, byElem=True)
1056 twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4,
1057 dupCode, isDup=True, byElem=True)
1058 twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4,
1059 dupCode, isDup=True, byElem=True, scalar=True)
1060 # DUP (general register)
1061 dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W')
1062 dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
1063 dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X')
1064 # EOR
1065 eorCode = "destElem = srcElem1 ^ srcElem2;"
1066 threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode)
1067 threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode)
1068 # EXT
1069 extCode = '''
1070 for (unsigned i = 0; i < eCount; i++) {
1071 unsigned index = i + imm;
1072 if (index < eCount) {
1073 destReg.elements[i] = srcReg1.elements[index];
1074 } else {
1075 index -= eCount;
1076 if (index >= eCount) {
1077 fault = std::make_shared<UndefinedInstruction>(
1078 machInst, false, mnemonic);
1079 } else {
1080 destReg.elements[i] = srcReg2.elements[index];
1081 }
1082 }
1083 }
1084 '''
1085 extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode)
1086 extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode)
1087 # FABD
1088 fpOp = '''
1089 FPSCR fpscr = (FPSCR) FpscrExc;
1090 destElem = %s;
1091 FpscrExc = fpscr;
1092 '''
1093 fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))"
1094 threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2,
1095 fabdCode)
1096 threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4,
1097 fabdCode)
1098 threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4,
1099 fabdCode, scalar=True)
1100 # FABS
1101 fabsCode = fpOp % "fplibAbs<Element>(srcElem1)"
1102 twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2,
1103 fabsCode)
1104 twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4,
1105 fabsCode)
1106 # FACGE
1107 fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),"
1108 " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0")
1109 facgeCode = fpCmpAbsOp % "GE"
1110 threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes,
1111 2, facgeCode)
1112 threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4,
1113 facgeCode)
1114 threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4,
1115 facgeCode, scalar=True)
1116 # FACGT
1117 facgtCode = fpCmpAbsOp % "GT"
1118 threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes,
1119 2, facgtCode)
1120 threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4,
1121 facgtCode)
1122 threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4,
1123 facgtCode, scalar=True)
1124 # FADD
1125 fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)"
1126 faddCode = fpBinOp % "Add"
1127 threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2,
1128 faddCode)
1129 threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4,
1130 faddCode)
1131 # FADDP (scalar)
1132 twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp",
1133 ("uint32_t",), 2, faddCode)
1134 twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp",
1135 ("uint64_t",), 4, faddCode)
1136 # FADDP (vector)
1137 threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes,
1138 2, faddCode, pairwise=True)
1139 threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4,
1140 faddCode, pairwise=True)
1141 # FCMEQ (register)
1142 fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?"
1143 " -1 : 0")
1144 fcmeqCode = fpCmpOp % "EQ"
1145 threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes,
1146 2, fcmeqCode)
1147 threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4,
1148 fcmeqCode)
1149 threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4,
1150 fcmeqCode, scalar=True)
1151 # FCMEQ (zero)
1152 fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0"
1153 fcmeqZeroCode = fpCmpZeroOp % "EQ"
1154 twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1155 2, fcmeqZeroCode)
1156 twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1157 fcmeqZeroCode)
1158 twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1159 fcmeqZeroCode, scalar=True)
1160 # FCMGE (register)
1161 fcmgeCode = fpCmpOp % "GE"
1162 threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes,
1163 2, fcmgeCode)
1164 threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4,
1165 fcmgeCode)
1166 threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4,
1167 fcmgeCode, scalar=True)
1168 # FCMGE (zero)
1169 fcmgeZeroCode = fpCmpZeroOp % "GE"
1170 twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1171 2, fcmgeZeroCode)
1172 twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1173 fcmgeZeroCode)
1174 twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1175 fcmgeZeroCode, scalar=True)
1176 # FCMGT (register)
1177 fcmgtCode = fpCmpOp % "GT"
1178 threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes,
1179 2, fcmgtCode)
1180 threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4,
1181 fcmgtCode)
1182 threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4,
1183 fcmgtCode, scalar=True)
1184 # FCMGT (zero)
1185 fcmgtZeroCode = fpCmpZeroOp % "GT"
1186 twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1187 2, fcmgtZeroCode)
1188 twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1189 fcmgtZeroCode)
1190 twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1191 fcmgtZeroCode, scalar=True)
1192 # FCMLE (zero)
1193 fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?"
1194 " -1 : 0")
1195 fcmleZeroCode = fpCmpRevZeroOp % "GE"
1196 twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1197 2, fcmleZeroCode)
1198 twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1199 fcmleZeroCode)
1200 twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1201 fcmleZeroCode, scalar=True)
1202 # FCMLT (zero)
1203 fcmltZeroCode = fpCmpRevZeroOp % "GT"
1204 twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes,
1205 2, fcmltZeroCode)
1206 twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4,
1207 fcmltZeroCode)
1208 twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4,
1209 fcmltZeroCode, scalar=True)
1210 # FCVTAS
1211 fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>("
1212 "srcElem1, %s, %s, %s, fpscr)")
1213 fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY")
1214 twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2,
1215 fcvtasCode)
1216 twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4,
1217 fcvtasCode)
1218 twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4,
1219 fcvtasCode, scalar=True)
1220 # FCVTAU
1221 fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY")
1222 twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2,
1223 fcvtauCode)
1224 twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4,
1225 fcvtauCode)
1226 twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4,
1227 fcvtauCode, scalar=True)
1228 # FCVTL, FCVTL2
1229 fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>("
1230 "srcElem1, FPCRRounding(fpscr), fpscr)")
1231 twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"),
1232 fcvtlCode)
1233 twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"),
1234 fcvtlCode, hi=True)
1235 # FCVTMS
1236 fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF")
1237 twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2,
1238 fcvtmsCode)
1239 twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4,
1240 fcvtmsCode)
1241 twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4,
1242 fcvtmsCode, scalar=True)
1243 # FCVTMU
1244 fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF")
1245 twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2,
1246 fcvtmuCode)
1247 twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4,
1248 fcvtmuCode)
1249 twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4,
1250 fcvtmuCode, scalar=True)
1251 # FCVTN, FCVTN2
1252 fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>("
1253 "srcElem1, FPCRRounding(fpscr), fpscr)")
1254 twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp",
1255 ("uint16_t", "uint32_t"), fcvtnCode)
1256 twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp",
1257 ("uint16_t", "uint32_t"), fcvtnCode, hi=True)
1258 # FCVTNS
1259 fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN")
1260 twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2,
1261 fcvtnsCode)
1262 twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4,
1263 fcvtnsCode)
1264 twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4,
1265 fcvtnsCode, scalar=True)
1266 # FCVTNU
1267 fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN")
1268 twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2,
1269 fcvtnuCode)
1270 twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4,
1271 fcvtnuCode)
1272 twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4,
1273 fcvtnuCode, scalar=True)
1274 # FCVTPS
1275 fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF")
1276 twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2,
1277 fcvtpsCode)
1278 twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4,
1279 fcvtpsCode)
1280 twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4,
1281 fcvtpsCode, scalar=True)
1282 # FCVTPU
1283 fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF")
1284 twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2,
1285 fcvtpuCode)
1286 twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4,
1287 fcvtpuCode)
1288 twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4,
1289 fcvtpuCode, scalar=True)
1290 # FCVTXN, FCVTXN2
1291 fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>("
1292 "srcElem1, FPRounding_ODD, fpscr)")
1293 twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes,
1294 fcvtxnCode)
1295 twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes,
1296 fcvtxnCode, hi=True)
1297 twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes,
1298 fcvtxnCode, scalar=True)
1299 # FCVTZS (fixed-point)
1300 fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO")
1301 twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes,
1302 2, fcvtzsCode, hasImm=True)
1303 twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4,
1304 fcvtzsCode, hasImm=True)
1305 twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4,
1306 fcvtzsCode, hasImm=True, scalar=True)
1307 # FCVTZS (integer)
1308 fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO")
1309 twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes,
1310 2, fcvtzsIntCode)
1311 twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4,
1312 fcvtzsIntCode)
1313 twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4,
1314 fcvtzsIntCode, scalar=True)
1315 # FCVTZU (fixed-point)
1316 fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO")
1317 twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes,
1318 2, fcvtzuCode, hasImm=True)
1319 twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4,
1320 fcvtzuCode, hasImm=True)
1321 twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4,
1322 fcvtzuCode, hasImm=True, scalar=True)
1323 # FCVTZU (integer)
1324 fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO")
1325 twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2,
1326 fcvtzuIntCode)
1327 twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4,
1328 fcvtzuIntCode)
1329 twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4,
1330 fcvtzuIntCode, scalar=True)
1331 # FDIV
1332 fdivCode = fpBinOp % "Div"
1333 threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2,
1334 fdivCode)
1335 threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4,
1336 fdivCode)
1337 # FMAX
1338 fmaxCode = fpBinOp % "Max"
1339 threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2,
1340 fmaxCode)
1341 threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4,
1342 fmaxCode)
1343 # FMAXNM
1344 fmaxnmCode = fpBinOp % "MaxNum"
1345 threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes,
1346 2, fmaxnmCode)
1347 threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4,
1348 fmaxnmCode)
1349 # FMAXNMP (scalar)
1350 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp",
1351 ("uint32_t",), 2, fmaxnmCode)
1352 twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp",
1353 ("uint64_t",), 4, fmaxnmCode)
1354 # FMAXNMP (vector)
1355 threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp",
1356 smallFloatTypes, 2, fmaxnmCode, pairwise=True)
1357 threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4,
1358 fmaxnmCode, pairwise=True)
1359 # FMAXNMV
1360 # Note: SimdFloatCmpOp can be a bit optimistic here
1361 fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)"
1362 fmaxnmAcrossCode = fpAcrossOp % "MaxNum"
1363 twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",),
1364 4, fmaxnmAcrossCode)
1365 # FMAXP (scalar)
1366 twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp",
1367 ("uint32_t",), 2, fmaxCode)
1368 twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp",
1369 ("uint64_t",), 4, fmaxCode)
1370 # FMAXP (vector)
1371 threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes,
1372 2, fmaxCode, pairwise=True)
1373 threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4,
1374 fmaxCode, pairwise=True)
1375 # FMAXV
1376 # Note: SimdFloatCmpOp can be a bit optimistic here
1377 fmaxAcrossCode = fpAcrossOp % "Max"
1378 twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
1379 fmaxAcrossCode)
1380 # FMIN
1381 fminCode = fpBinOp % "Min"
1382 threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2,
1383 fminCode)
1384 threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4,
1385 fminCode)
1386 # FMINNM
1387 fminnmCode = fpBinOp % "MinNum"
1388 threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes,
1389 2, fminnmCode)
1390 threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4,
1391 fminnmCode)
1392 # FMINNMP (scalar)
1393 twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp",
1394 ("uint32_t",), 2, fminnmCode)
1395 twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp",
1396 ("uint64_t",), 4, fminnmCode)
1397 # FMINNMP (vector)
1398 threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp",
1399 smallFloatTypes, 2, fminnmCode, pairwise=True)
1400 threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4,
1401 fminnmCode, pairwise=True)
1402 # FMINNMV
1403 # Note: SimdFloatCmpOp can be a bit optimistic here
1404 fminnmAcrossCode = fpAcrossOp % "MinNum"
1405 twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",),
1406 4, fminnmAcrossCode)
1407 # FMINP (scalar)
1408 twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp",
1409 ("uint32_t",), 2, fminCode)
1410 twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp",
1411 ("uint64_t",), 4, fminCode)
1412 # FMINP (vector)
1413 threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes,
1414 2, fminCode, pairwise=True)
1415 threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4,
1416 fminCode, pairwise=True)
1417 # FMINV
1418 # Note: SimdFloatCmpOp can be a bit optimistic here
1419 fminAcrossCode = fpAcrossOp % "Min"
1420 twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
1421 fminAcrossCode)
1422 # FMLA (by element)
1423 fmlaCode = fpOp % ("fplibMulAdd<Element>("
1424 "destElem, srcElem1, srcElem2, fpscr)")
1425 threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp",
1426 smallFloatTypes, 2, fmlaCode, True, byElem=True)
1427 threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes,
1428 4, fmlaCode, True, byElem=True)
1429 threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes,
1430 4, fmlaCode, True, byElem=True, scalar=True)
1431 # FMLA (vector)
1432 threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes,
1433 2, fmlaCode, True)
1434 threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4,
1435 fmlaCode, True)
1436 # FMLS (by element)
1437 fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem,"
1438 " fplibNeg<Element>(srcElem1), srcElem2, fpscr)")
1439 threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp",
1440 smallFloatTypes, 2, fmlsCode, True, byElem=True)
1441 threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes,
1442 4, fmlsCode, True, byElem=True)
1443 threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes,
1444 4, fmlsCode, True, byElem=True, scalar=True)
1445 # FMLS (vector)
1446 threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes,
1447 2, fmlsCode, True)
1448 threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4,
1449 fmlsCode, True)
1450 # FMOV
1451 fmovCode = 'destElem = imm;'
1452 oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2,
1453 fmovCode)
1454 oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode)
1455 # FMUL (by element)
1456 fmulCode = fpBinOp % "Mul"
1457 threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp",
1458 smallFloatTypes, 2, fmulCode, byElem=True)
1459 threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4,
1460 fmulCode, byElem=True)
1461 threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4,
1462 fmulCode, byElem=True, scalar=True)
1463 # FMUL (vector)
1464 threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2,
1465 fmulCode)
1466 threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4,
1467 fmulCode)
1468 # FMULX
1469 fmulxCode = fpBinOp % "MulX"
1470 threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes,
1471 2, fmulxCode)
1472 threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4,
1473 fmulxCode)
1474 threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4,
1475 fmulxCode, scalar=True)
1476 # FMULX (by element)
1477 threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp",
1478 smallFloatTypes, 2, fmulxCode, byElem=True)
1479 threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes,
1480 4, fmulxCode, byElem=True)
1481 threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes,
1482 4, fmulxCode, byElem=True, scalar=True)
1483 # FNEG
1484 fnegCode = fpOp % "fplibNeg<Element>(srcElem1)"
1485 twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2,
1486 fnegCode)
1487 twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4,
1488 fnegCode)
1489 # FRECPE
1490 frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)"
1491 twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp",
1492 smallFloatTypes, 2, frecpeCode)
1493 twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4,
1494 frecpeCode)
1495 twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes,
1496 4, frecpeCode, scalar=True)
1497 # FRECPS
1498 frecpsCode = fpBinOp % "RecipStepFused"
1499 threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp",
1500 smallFloatTypes, 2, frecpsCode)
1501 threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes,
1502 4, frecpsCode)
1503 threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes,
1504 4, frecpsCode, scalar=True)
1505 # FRECPX
1506 frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)"
1507 twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4,
1508 frecpxCode, scalar=True)
1509 # FRINTA
1510 frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)"
1511 frintaCode = frintCode % ("FPRounding_TIEAWAY", "false")
1512 twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2,
1513 frintaCode)
1514 twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4,
1515 frintaCode)
1516 # FRINTI
1517 frintiCode = frintCode % ("FPCRRounding(fpscr)", "false")
1518 twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2,
1519 frintiCode)
1520 twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4,
1521 frintiCode)
1522 # FRINTM
1523 frintmCode = frintCode % ("FPRounding_NEGINF", "false")
1524 twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2,
1525 frintmCode)
1526 twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4,
1527 frintmCode)
1528 # FRINTN
1529 frintnCode = frintCode % ("FPRounding_TIEEVEN", "false")
1530 twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2,
1531 frintnCode)
1532 twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4,
1533 frintnCode)
1534 # FRINTP
1535 frintpCode = frintCode % ("FPRounding_POSINF", "false")
1536 twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2,
1537 frintpCode)
1538 twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4,
1539 frintpCode)
1540 # FRINTX
1541 frintxCode = frintCode % ("FPCRRounding(fpscr)", "true")
1542 twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2,
1543 frintxCode)
1544 twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4,
1545 frintxCode)
1546 # FRINTZ
1547 frintzCode = frintCode % ("FPRounding_ZERO", "false")
1548 twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2,
1549 frintzCode)
1550 twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4,
1551 frintzCode)
1552 # FRSQRTE
1553 frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)"
1554 twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp",
1555 smallFloatTypes, 2, frsqrteCode)
1556 twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4,
1557 frsqrteCode)
1558 twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4,
1559 frsqrteCode, scalar=True)
1560 # FRSQRTS
1561 frsqrtsCode = fpBinOp % "RSqrtStepFused"
1562 threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp",
1563 smallFloatTypes, 2, frsqrtsCode)
1564 threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes,
1565 4, frsqrtsCode)
1566 threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes,
1567 4, frsqrtsCode, scalar=True)
1568 # FSQRT
1569 fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)"
1570 twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2,
1571 fsqrtCode)
1572 twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4,
1573 fsqrtCode)
1574 # FSUB
1575 fsubCode = fpBinOp % "Sub"
1576 threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2,
1577 fsubCode)
1578 threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4,
1579 fsubCode)
1580 # INS (element)
1581 insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4)
1582 # INS (general register)
1583 insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4,
1584 'W')
1585 insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X')
1586 # MLA (by element)
1587 mlaCode = "destElem += srcElem1 * srcElem2;"
1588 threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp",
1589 ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True)
1590 threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp",
1591 ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True)
1592 # MLA (vector)
1593 threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2,
1594 mlaCode, True)
1595 threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4,
1596 mlaCode, True)
1597 # MLS (by element)
1598 mlsCode = "destElem -= srcElem1 * srcElem2;"
1599 threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp",
1600 ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True)
1601 threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp",
1602 ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True)
1603 # MLS (vector)
1604 threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2,
1605 mlsCode, True)
1606 threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4,
1607 mlsCode, True)
1608 # MOV (element) -> alias to INS (element)
1609 # MOV (from general) -> alias to INS (general register)
1610 # MOV (scalar) -> alias to DUP (element)
1611 # MOV (to general) -> alias to UMOV
1612 # MOV (vector) -> alias to ORR (register)
1613 # MOVI
1614 movImmCode = "destElem = imm;"
1615 oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2,
1616 movImmCode)
1617 oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4,
1618 movImmCode)
1619 # MUL (by element)
1620 mulCode = "destElem = srcElem1 * srcElem2;"
1621 threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp",
1622 ("uint16_t", "uint32_t"), 2, mulCode, byElem=True)
1623 threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp",
1624 ("uint16_t", "uint32_t"), 4, mulCode, byElem=True)
1625 # MUL (vector)
1626 threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2,
1627 mulCode)
1628 threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4,
1629 mulCode)
1630 # MVN
1631 mvnCode = "destElem = ~srcElem1;"
1632 twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode)
1633 twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode)
1634 # MVNI
1635 mvniCode = "destElem = ~imm;"
1636 oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode)
1637 oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode)
1638 # NEG
1639 negCode = "destElem = -srcElem1;"
1640 twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode)
1641 twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode)
1642 # NOT -> alias to MVN
1643 # ORN
1644 ornCode = "destElem = srcElem1 | ~srcElem2;"
1645 threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode)
1646 threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode)
1647 # ORR (immediate)
1648 orrImmCode = "destElem |= imm;"
1649 oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2,
1650 orrImmCode, True)
1651 oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4,
1652 orrImmCode, True)
1653 # ORR (register)
1654 orrCode = "destElem = srcElem1 | srcElem2;"
1655 threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode)
1656 threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode)
1657 # PMUL
1658 pmulCode = '''
1659 destElem = 0;
1660 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
1661 if (bits(srcElem2, j))
1662 destElem ^= srcElem1 << j;
1663 }
1664 '''
1665 threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2,
1666 pmulCode)
1667 threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4,
1668 pmulCode)
1669 # PMULL, PMULL2
1670 # Note: 64-bit PMULL is not available (Crypto. Extension)
1671 pmullCode = '''
1672 destElem = 0;
1673 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
1674 if (bits(srcElem2, j))
1675 destElem ^= (BigElement)srcElem1 << j;
1676 }
1677 '''
1678 threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode)
1679 threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",),
1680 pmullCode, hi=True)
1681 # RADDHN, RADDHN2
1682 raddhnCode = '''
1683 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1684 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1685 (sizeof(Element) * 8);
1686 '''
1687 threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes,
1688 raddhnCode)
1689 threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes,
1690 raddhnCode, hi=True)
1691 # RBIT
1692 rbitCode = '''
1693 destElem = 0;
1694 Element temp = srcElem1;
1695 for (int i = 0; i < 8 * sizeof(Element); i++) {
1696 destElem = destElem | ((temp & 0x1) <<
1697 (8 * sizeof(Element) - 1 - i));
1698 temp >>= 1;
1699 }
1700 '''
1701 twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode)
1702 twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode)
1703 # REV16
1704 rev16Code = '''
1705 destElem = srcElem1;
1706 unsigned groupSize = ((1 << 1) / sizeof(Element));
1707 unsigned reverseMask = (groupSize - 1);
1708 j = i ^ reverseMask;
1709 '''
1710 twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2,
1711 rev16Code)
1712 twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4,
1713 rev16Code)
1714 # REV32
1715 rev32Code = '''
1716 destElem = srcElem1;
1717 unsigned groupSize = ((1 << 2) / sizeof(Element));
1718 unsigned reverseMask = (groupSize - 1);
1719 j = i ^ reverseMask;
1720 '''
1721 twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"),
1722 2, rev32Code)
1723 twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"),
1724 4, rev32Code)
1725 # REV64
1726 rev64Code = '''
1727 destElem = srcElem1;
1728 unsigned groupSize = ((1 << 3) / sizeof(Element));
1729 unsigned reverseMask = (groupSize - 1);
1730 j = i ^ reverseMask;
1731 '''
1732 twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2,
1733 rev64Code)
1734 twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4,
1735 rev64Code)
1736 # RSHRN, RSHRN2
1737 rshrnCode = '''
1738 if (imm > sizeof(srcElem1) * 8) {
1739 destElem = 0;
1740 } else if (imm) {
1741 Element rBit = bits(srcElem1, imm - 1);
1742 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
1743 } else {
1744 destElem = srcElem1;
1745 }
1746 '''
1747 twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes,
1748 rshrnCode, hasImm=True)
1749 twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes,
1750 rshrnCode, hasImm=True, hi=True)
1751 # RSUBHN, RSUBHN2
1752 rsubhnCode = '''
1753 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1754 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1755 (sizeof(Element) * 8);
1756 '''
1757 threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes,
1758 rsubhnCode)
1759 threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes,
1760 rsubhnCode, hi=True)
1761 # SABA
1762 abaCode = '''
1763 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1764 (srcElem2 - srcElem1);
1765 '''
1766 threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2,
1767 abaCode, True)
1768 threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4,
1769 abaCode, True)
1770 # SABAL, SABAL2
1771 abalCode = '''
1772 destElem += (srcElem1 > srcElem2) ?
1773 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1774 ((BigElement)srcElem2 - (BigElement)srcElem1);
1775 '''
1776 threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes,
1777 abalCode, True)
1778 threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes,
1779 abalCode, True, hi=True)
1780 # SABD
1781 abdCode = '''
1782 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1783 (srcElem2 - srcElem1);
1784 '''
1785 threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2,
1786 abdCode)
1787 threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4,
1788 abdCode)
1789 # SABDL, SABDL2
1790 abdlCode = '''
1791 destElem = (srcElem1 > srcElem2) ?
1792 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1793 ((BigElement)srcElem2 - (BigElement)srcElem1);
1794 '''
1795 threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes,
1796 abdlCode, True)
1797 threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes,
1798 abdlCode, True, hi=True)
1799 # SADALP
1800 adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;"
1801 twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2,
1802 adalpCode, True)
1803 twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4,
1804 adalpCode, True)
1805 # SADDL, SADDL2
1806 addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;"
1807 threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes,
1808 addlwCode)
1809 threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes,
1810 addlwCode, hi=True)
1811 # SADDLP
1812 twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2,
1813 addlwCode)
1814 twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4,
1815 addlwCode)
1816 # SADDLV
1817 # Note: SimdAddOp can be a bit optimistic here
1818 addAcrossLongCode = "destElem += (BigElement)srcElem1;"
1819 twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"),
1820 2, addAcrossLongCode, long=True)
1821 twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"),
1822 4, addAcrossLongCode, long=True)
1823 twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4,
1824 addAcrossLongCode, doubleDest=True, long=True)
1825 # SADDW, SADDW2
1826 threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes,
1827 addlwCode)
1828 threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes,
1829 addlwCode, hi=True)
1830 # SCVTF (fixed-point)
1831 scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm,"
1832 " false, FPCRRounding(fpscr), fpscr)")
1833 twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
1834 scvtfFixedCode % 32, hasImm=True)
1835 twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4,
1836 scvtfFixedCode % 32, hasImm=True)
1837 twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4,
1838 scvtfFixedCode % 64, hasImm=True)
1839 twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes,
1840 4, scvtfFixedCode % 32, hasImm=True, scalar=True)
1841 twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4,
1842 scvtfFixedCode % 64, hasImm=True, scalar=True)
1843 # SCVTF (integer)
1844 scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0,"
1845 " false, FPCRRounding(fpscr), fpscr)")
1846 twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
1847 scvtfIntCode % 32)
1848 twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4,
1849 scvtfIntCode % 32)
1850 twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4,
1851 scvtfIntCode % 64)
1852 twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4,
1853 scvtfIntCode % 32, scalar=True)
1854 twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4,
1855 scvtfIntCode % 64, scalar=True)
1856 # SHADD
1857 haddCode = '''
1858 Element carryBit =
1859 (((unsigned)srcElem1 & 0x1) +
1860 ((unsigned)srcElem2 & 0x1)) >> 1;
1861 // Use division instead of a shift to ensure the sign extension works
1862 // right. The compiler will figure out if it can be a shift. Mask the
1863 // inputs so they get truncated correctly.
1864 destElem = (((srcElem1 & ~(Element)1) / 2) +
1865 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1866 '''
1867 threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2,
1868 haddCode)
1869 threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4,
1870 haddCode)
1871 # SHL
1872 shlCode = '''
1873 if (imm >= sizeof(Element) * 8)
1874 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
1875 else
1876 destElem = srcElem1 << imm;
1877 '''
1878 twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode,
1879 hasImm=True)
1880 twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode,
1881 hasImm=True)
1882 # SHLL, SHLL2
1883 shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);"
1884 twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode)
1885 twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode,
1886 hi=True)
1887 # SHRN, SHRN2
1888 shrnCode = '''
1889 if (imm >= sizeof(srcElem1) * 8) {
1890 destElem = 0;
1891 } else {
1892 destElem = srcElem1 >> imm;
1893 }
1894 '''
1895 twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes,
1896 shrnCode, hasImm=True)
1897 twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes,
1898 shrnCode, hasImm=True, hi=True)
1899 # SHSUB
1900 hsubCode = '''
1901 Element borrowBit =
1902 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1903 // Use division instead of a shift to ensure the sign extension works
1904 // right. The compiler will figure out if it can be a shift. Mask the
1905 // inputs so they get truncated correctly.
1906 destElem = (((srcElem1 & ~(Element)1) / 2) -
1907 ((srcElem2 & ~(Element)1) / 2)) - borrowBit;
1908 '''
1909 threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2,
1910 hsubCode)
1911 threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4,
1912 hsubCode)
1913 # SLI
1914 sliCode = '''
1915 if (imm >= sizeof(Element) * 8)
1916 destElem = destElem;
1917 else
1918 destElem = (srcElem1 << imm) | (destElem & mask(imm));
1919 '''
1920 twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode,
1921 True, hasImm=True)
1922 twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode,
1923 True, hasImm=True)
1924 # SMAX
1925 maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;"
1926 threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2,
1927 maxCode)
1928 threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4,
1929 maxCode)
1930 # SMAXP
1931 threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2,
1932 maxCode, pairwise=True)
1933 threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4,
1934 maxCode, pairwise=True)
1935 # SMAXV
1936 maxAcrossCode = '''
1937 if (i == 0 || srcElem1 > destElem)
1938 destElem = srcElem1;
1939 '''
1940 twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"),
1941 2, maxAcrossCode)
1942 twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4,
1943 maxAcrossCode)
1944 # SMIN
1945 minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;"
1946 threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2,
1947 minCode)
1948 threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4,
1949 minCode)
1950 # SMINP
1951 threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2,
1952 minCode, pairwise=True)
1953 threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4,
1954 minCode, pairwise=True)
1955 # SMINV
1956 minAcrossCode = '''
1957 if (i == 0 || srcElem1 < destElem)
1958 destElem = srcElem1;
1959 '''
1960 twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"),
1961 2, minAcrossCode)
1962 twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4,
1963 minAcrossCode)
1964
1965 split('exec')
1966
1967 # SMLAL, SMLAL2 (by element)
1968 mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;"
1969 threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp",
1970 ("int16_t", "int32_t"), mlalCode, True, byElem=True)
1971 threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp",
1972 ("int16_t", "int32_t"), mlalCode, True, byElem=True,
1973 hi=True)
1974 # SMLAL, SMLAL2 (vector)
1975 threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes,
1976 mlalCode, True)
1977 threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes,
1978 mlalCode, True, hi=True)
1979 # SMLSL, SMLSL2 (by element)
1980 mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;"
1981 threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes,
1982 mlslCode, True, byElem=True)
1983 threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp",
1984 smallSignedTypes, mlslCode, True, byElem=True, hi=True)
1985 # SMLSL, SMLSL2 (vector)
1986 threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes,
1987 mlslCode, True)
1988 threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes,
1989 mlslCode, True, hi=True)
1990 # SMOV
1991 insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4,
1992 'W', True)
1993 insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X',
1994 True)
1995 # SMULL, SMULL2 (by element)
1996 mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;"
1997 threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes,
1998 mullCode, byElem=True)
1999 threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes,
2000 mullCode, byElem=True, hi=True)
2001 # SMULL, SMULL2 (vector)
2002 threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes,
2003 mullCode)
2004 threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes,
2005 mullCode, hi=True)
2006 # SQABS
2007 sqabsCode = '''
2008 FPSCR fpscr = (FPSCR) FpscrQc;
2009 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2009 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2010 fpscr.qc = 1;
2011 destElem = ~srcElem1;
2012 } else if (srcElem1 < 0) {
2013 destElem = -srcElem1;
2014 } else {
2015 destElem = srcElem1;
2016 }
2017 FpscrQc = fpscr;
2018 '''
2019 twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2,
2020 sqabsCode)
2021 twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4,
2022 sqabsCode)
2023 twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4,
2024 sqabsCode, scalar=True)
2025 # SQADD
2026 sqaddCode = '''
2027 destElem = srcElem1 + srcElem2;
2028 FPSCR fpscr = (FPSCR) FpscrQc;
2029 bool negDest = (destElem < 0);
2030 bool negSrc1 = (srcElem1 < 0);
2031 bool negSrc2 = (srcElem2 < 0);
2032 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2010 fpscr.qc = 1;
2011 destElem = ~srcElem1;
2012 } else if (srcElem1 < 0) {
2013 destElem = -srcElem1;
2014 } else {
2015 destElem = srcElem1;
2016 }
2017 FpscrQc = fpscr;
2018 '''
2019 twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2,
2020 sqabsCode)
2021 twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4,
2022 sqabsCode)
2023 twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4,
2024 sqabsCode, scalar=True)
2025 # SQADD
2026 sqaddCode = '''
2027 destElem = srcElem1 + srcElem2;
2028 FPSCR fpscr = (FPSCR) FpscrQc;
2029 bool negDest = (destElem < 0);
2030 bool negSrc1 = (srcElem1 < 0);
2031 bool negSrc2 = (srcElem2 < 0);
2032 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2033 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2033 destElem = std::numeric_limits<Element>::min();
2034 if (negDest)
2035 destElem -= 1;
2036 fpscr.qc = 1;
2037 }
2038 FpscrQc = fpscr;
2039 '''
2040 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2,
2041 sqaddCode)
2042 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4,
2043 sqaddCode)
2044 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4,
2045 sqaddCode, scalar=True)
2046 # SQDMLAL, SQDMLAL2 (by element)
2047 qdmlalCode = '''
2048 FPSCR fpscr = (FPSCR) FpscrQc;
2049 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2034 if (negDest)
2035 destElem -= 1;
2036 fpscr.qc = 1;
2037 }
2038 FpscrQc = fpscr;
2039 '''
2040 threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2,
2041 sqaddCode)
2042 threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4,
2043 sqaddCode)
2044 threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4,
2045 sqaddCode, scalar=True)
2046 # SQDMLAL, SQDMLAL2 (by element)
2047 qdmlalCode = '''
2048 FPSCR fpscr = (FPSCR) FpscrQc;
2049 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2050 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2050 Element maxNeg = std::numeric_limits<Element>::min();
2051 Element halfNeg = maxNeg / 2;
2052 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2053 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2054 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2055 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2056 fpscr.qc = 1;
2057 }
2058 bool negPreDest = ltz(destElem);
2059 destElem += midElem;
2060 bool negDest = ltz(destElem);
2061 bool negMid = ltz(midElem);
2062 if (negPreDest == negMid && negMid != negDest) {
2063 destElem = mask(sizeof(BigElement) * 8 - 1);
2064 if (negPreDest)
2065 destElem = ~destElem;
2066 fpscr.qc = 1;
2067 }
2068 FpscrQc = fpscr;
2069 '''
2070 threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp",
2071 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True)
2072 threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp",
2073 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
2074 hi=True)
2075 threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp",
2076 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
2077 scalar=True)
2078 # SQDMLAL, SQDMLAL2 (vector)
2079 threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp",
2080 ("int16_t", "int32_t"), qdmlalCode, True)
2081 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp",
2082 ("int16_t", "int32_t"), qdmlalCode, True, hi=True)
2083 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp",
2084 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True)
2085 # SQDMLSL, SQDMLSL2 (by element)
2086 qdmlslCode = '''
2087 FPSCR fpscr = (FPSCR) FpscrQc;
2088 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2051 Element halfNeg = maxNeg / 2;
2052 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2053 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2054 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2055 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2056 fpscr.qc = 1;
2057 }
2058 bool negPreDest = ltz(destElem);
2059 destElem += midElem;
2060 bool negDest = ltz(destElem);
2061 bool negMid = ltz(midElem);
2062 if (negPreDest == negMid && negMid != negDest) {
2063 destElem = mask(sizeof(BigElement) * 8 - 1);
2064 if (negPreDest)
2065 destElem = ~destElem;
2066 fpscr.qc = 1;
2067 }
2068 FpscrQc = fpscr;
2069 '''
2070 threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp",
2071 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True)
2072 threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp",
2073 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
2074 hi=True)
2075 threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp",
2076 ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
2077 scalar=True)
2078 # SQDMLAL, SQDMLAL2 (vector)
2079 threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp",
2080 ("int16_t", "int32_t"), qdmlalCode, True)
2081 threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp",
2082 ("int16_t", "int32_t"), qdmlalCode, True, hi=True)
2083 threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp",
2084 ("int16_t", "int32_t"), qdmlalCode, True, scalar=True)
2085 # SQDMLSL, SQDMLSL2 (by element)
2086 qdmlslCode = '''
2087 FPSCR fpscr = (FPSCR) FpscrQc;
2088 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2089 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2089 Element maxNeg = std::numeric_limits<Element>::min();
2090 Element halfNeg = maxNeg / 2;
2091 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2092 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2093 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2094 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2095 fpscr.qc = 1;
2096 }
2097 bool negPreDest = ltz(destElem);
2098 destElem -= midElem;
2099 bool negDest = ltz(destElem);
2100 bool posMid = ltz((BigElement)-midElem);
2101 if (negPreDest == posMid && posMid != negDest) {
2102 destElem = mask(sizeof(BigElement) * 8 - 1);
2103 if (negPreDest)
2104 destElem = ~destElem;
2105 fpscr.qc = 1;
2106 }
2107 FpscrQc = fpscr;
2108 '''
2109 threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp",
2110 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True)
2111 threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp",
2112 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
2113 hi=True)
2114 threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp",
2115 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
2116 scalar=True)
2117 # SQDMLSL, SQDMLSL2 (vector)
2118 threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp",
2119 ("int16_t", "int32_t"), qdmlslCode, True)
2120 threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp",
2121 ("int16_t", "int32_t"), qdmlslCode, True, hi=True)
2122 threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp",
2123 ("int16_t", "int32_t"), qdmlslCode, True, scalar=True)
2124 # SQDMULH (by element)
2125 sqdmulhCode = '''
2126 FPSCR fpscr = (FPSCR) FpscrQc;
2127 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2128 (sizeof(Element) * 8);
2129 if (srcElem1 == srcElem2 &&
2130 srcElem1 == (Element)((Element)1 <<
2131 (sizeof(Element) * 8 - 1))) {
2132 destElem = ~srcElem1;
2133 fpscr.qc = 1;
2134 }
2135 FpscrQc = fpscr;
2136 '''
2137 threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp",
2138 ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True)
2139 threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp",
2140 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True)
2141 threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp",
2142 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True,
2143 scalar=True)
2144 # SQDMULH (vector)
2145 threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp",
2146 ("int16_t", "int32_t"), 2, sqdmulhCode)
2147 threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp",
2148 ("int16_t", "int32_t"), 4, sqdmulhCode)
2149 threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp",
2150 ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True)
2151 # SQDMULL, SQDMULL2 (by element)
2152 qdmullCode = '''
2153 FPSCR fpscr = (FPSCR) FpscrQc;
2154 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2155 if (srcElem1 == srcElem2 &&
2156 srcElem1 == (Element)((Element)1 <<
2157 (Element)(sizeof(Element) * 8 - 1))) {
2158 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2159 fpscr.qc = 1;
2160 }
2161 FpscrQc = fpscr;
2162 '''
2163 threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp",
2164 ("int16_t", "int32_t"), qdmullCode, True, byElem=True)
2165 threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp",
2166 ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
2167 hi=True)
2168 threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp",
2169 ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
2170 scalar=True)
2171 # SQDMULL, SQDMULL2 (vector)
2172 threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp",
2173 ("int16_t", "int32_t"), qdmullCode, True)
2174 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp",
2175 ("int16_t", "int32_t"), qdmullCode, True, hi=True)
2176 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp",
2177 ("int16_t", "int32_t"), qdmullCode, True, scalar=True)
2178 # SQNEG
2179 sqnegCode = '''
2180 FPSCR fpscr = (FPSCR) FpscrQc;
2090 Element halfNeg = maxNeg / 2;
2091 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2092 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2093 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2094 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2095 fpscr.qc = 1;
2096 }
2097 bool negPreDest = ltz(destElem);
2098 destElem -= midElem;
2099 bool negDest = ltz(destElem);
2100 bool posMid = ltz((BigElement)-midElem);
2101 if (negPreDest == posMid && posMid != negDest) {
2102 destElem = mask(sizeof(BigElement) * 8 - 1);
2103 if (negPreDest)
2104 destElem = ~destElem;
2105 fpscr.qc = 1;
2106 }
2107 FpscrQc = fpscr;
2108 '''
2109 threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp",
2110 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True)
2111 threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp",
2112 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
2113 hi=True)
2114 threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp",
2115 ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
2116 scalar=True)
2117 # SQDMLSL, SQDMLSL2 (vector)
2118 threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp",
2119 ("int16_t", "int32_t"), qdmlslCode, True)
2120 threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp",
2121 ("int16_t", "int32_t"), qdmlslCode, True, hi=True)
2122 threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp",
2123 ("int16_t", "int32_t"), qdmlslCode, True, scalar=True)
2124 # SQDMULH (by element)
2125 sqdmulhCode = '''
2126 FPSCR fpscr = (FPSCR) FpscrQc;
2127 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2128 (sizeof(Element) * 8);
2129 if (srcElem1 == srcElem2 &&
2130 srcElem1 == (Element)((Element)1 <<
2131 (sizeof(Element) * 8 - 1))) {
2132 destElem = ~srcElem1;
2133 fpscr.qc = 1;
2134 }
2135 FpscrQc = fpscr;
2136 '''
2137 threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp",
2138 ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True)
2139 threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp",
2140 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True)
2141 threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp",
2142 ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True,
2143 scalar=True)
2144 # SQDMULH (vector)
2145 threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp",
2146 ("int16_t", "int32_t"), 2, sqdmulhCode)
2147 threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp",
2148 ("int16_t", "int32_t"), 4, sqdmulhCode)
2149 threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp",
2150 ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True)
2151 # SQDMULL, SQDMULL2 (by element)
2152 qdmullCode = '''
2153 FPSCR fpscr = (FPSCR) FpscrQc;
2154 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2155 if (srcElem1 == srcElem2 &&
2156 srcElem1 == (Element)((Element)1 <<
2157 (Element)(sizeof(Element) * 8 - 1))) {
2158 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2159 fpscr.qc = 1;
2160 }
2161 FpscrQc = fpscr;
2162 '''
2163 threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp",
2164 ("int16_t", "int32_t"), qdmullCode, True, byElem=True)
2165 threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp",
2166 ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
2167 hi=True)
2168 threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp",
2169 ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
2170 scalar=True)
2171 # SQDMULL, SQDMULL2 (vector)
2172 threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp",
2173 ("int16_t", "int32_t"), qdmullCode, True)
2174 threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp",
2175 ("int16_t", "int32_t"), qdmullCode, True, hi=True)
2176 threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp",
2177 ("int16_t", "int32_t"), qdmullCode, True, scalar=True)
2178 # SQNEG
2179 sqnegCode = '''
2180 FPSCR fpscr = (FPSCR) FpscrQc;
2181 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2181 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2182 fpscr.qc = 1;
2183 destElem = ~srcElem1;
2184 } else {
2185 destElem = -srcElem1;
2186 }
2187 FpscrQc = fpscr;
2188 '''
2189 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2,
2190 sqnegCode)
2191 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4,
2192 sqnegCode)
2193 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
2194 sqnegCode, scalar=True)
2195 # SQRDMULH (by element)
2196 sqrdmulhCode = '''
2197 FPSCR fpscr = (FPSCR) FpscrQc;
2198 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2199 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2200 (sizeof(Element) * 8);
2182 fpscr.qc = 1;
2183 destElem = ~srcElem1;
2184 } else {
2185 destElem = -srcElem1;
2186 }
2187 FpscrQc = fpscr;
2188 '''
2189 twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2,
2190 sqnegCode)
2191 twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4,
2192 sqnegCode)
2193 twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
2194 sqnegCode, scalar=True)
2195 # SQRDMULH (by element)
2196 sqrdmulhCode = '''
2197 FPSCR fpscr = (FPSCR) FpscrQc;
2198 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2199 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2200 (sizeof(Element) * 8);
2201 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2201 Element maxNeg = std::numeric_limits<Element>::min();
2202 Element halfNeg = maxNeg / 2;
2203 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2204 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2205 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2206 if (destElem < 0) {
2207 destElem = mask(sizeof(Element) * 8 - 1);
2208 } else {
2202 Element halfNeg = maxNeg / 2;
2203 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2204 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2205 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2206 if (destElem < 0) {
2207 destElem = mask(sizeof(Element) * 8 - 1);
2208 } else {
2209 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2209 destElem = std::numeric_limits<Element>::min();
2210 }
2211 fpscr.qc = 1;
2212 }
2213 FpscrQc = fpscr;
2214 '''
2215 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp",
2216 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True)
2217 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp",
2218 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True)
2219 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp",
2220 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True,
2221 scalar=True)
2222 # SQRDMULH (vector)
2223 threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp",
2224 ("int16_t", "int32_t"), 2, sqrdmulhCode)
2225 threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp",
2226 ("int16_t", "int32_t"), 4, sqrdmulhCode)
2227 threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp",
2228 ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True)
2229 # SQRSHL
2230 sqrshlCode = '''
2231 int16_t shiftAmt = (int8_t)srcElem2;
2232 FPSCR fpscr = (FPSCR) FpscrQc;
2233 if (shiftAmt < 0) {
2234 shiftAmt = -shiftAmt;
2235 Element rBit = 0;
2236 if (shiftAmt <= sizeof(Element) * 8)
2237 rBit = bits(srcElem1, shiftAmt - 1);
2238 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2239 rBit = 1;
2240 if (shiftAmt >= sizeof(Element) * 8) {
2241 shiftAmt = sizeof(Element) * 8 - 1;
2242 destElem = 0;
2243 } else {
2244 destElem = (srcElem1 >> shiftAmt);
2245 }
2246 // Make sure the right shift sign extended when it should.
2247 if (srcElem1 < 0 && destElem >= 0) {
2248 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2249 1 - shiftAmt));
2250 }
2251 destElem += rBit;
2252 } else if (shiftAmt > 0) {
2253 bool sat = false;
2254 if (shiftAmt >= sizeof(Element) * 8) {
2255 if (srcElem1 != 0)
2256 sat = true;
2257 else
2258 destElem = 0;
2259 } else {
2260 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
2261 sizeof(Element) * 8 - 1 - shiftAmt) !=
2262 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2263 sat = true;
2264 } else {
2265 destElem = srcElem1 << shiftAmt;
2266 }
2267 }
2268 if (sat) {
2269 fpscr.qc = 1;
2270 destElem = mask(sizeof(Element) * 8 - 1);
2271 if (srcElem1 < 0)
2272 destElem = ~destElem;
2273 }
2274 } else {
2275 destElem = srcElem1;
2276 }
2277 FpscrQc = fpscr;
2278 '''
2279 threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2,
2280 sqrshlCode)
2281 threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4,
2282 sqrshlCode)
2283 threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4,
2284 sqrshlCode, scalar=True)
2285 # SQRSHRN, SQRSHRN2
2286 sqrshrnCode = '''
2287 FPSCR fpscr = (FPSCR) FpscrQc;
2288 if (imm > sizeof(srcElem1) * 8) {
2289 if (srcElem1 != 0 && srcElem1 != -1)
2290 fpscr.qc = 1;
2291 destElem = 0;
2292 } else if (imm) {
2293 BigElement mid = (srcElem1 >> (imm - 1));
2294 uint64_t rBit = mid & 0x1;
2295 mid >>= 1;
2296 mid |= -(mid & ((BigElement)1 <<
2297 (sizeof(BigElement) * 8 - 1 - imm)));
2298 mid += rBit;
2299 if (mid != (Element)mid) {
2300 destElem = mask(sizeof(Element) * 8 - 1);
2301 if (srcElem1 < 0)
2302 destElem = ~destElem;
2303 fpscr.qc = 1;
2304 } else {
2305 destElem = mid;
2306 }
2307 } else {
2308 if (srcElem1 != (Element)srcElem1) {
2309 destElem = mask(sizeof(Element) * 8 - 1);
2310 if (srcElem1 < 0)
2311 destElem = ~destElem;
2312 fpscr.qc = 1;
2313 } else {
2314 destElem = srcElem1;
2315 }
2316 }
2317 FpscrQc = fpscr;
2318 '''
2319 twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes,
2320 sqrshrnCode, hasImm=True)
2321 twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes,
2322 sqrshrnCode, hasImm=True, hi=True)
2323 twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes,
2324 sqrshrnCode, hasImm=True, scalar=True)
2325 # SQRSHRUN, SQRSHRUN2
2326 sqrshrunCode = '''
2327 FPSCR fpscr = (FPSCR) FpscrQc;
2328 if (imm > sizeof(srcElem1) * 8) {
2329 if (srcElem1 != 0)
2330 fpscr.qc = 1;
2331 destElem = 0;
2332 } else if (imm) {
2333 BigElement mid = (srcElem1 >> (imm - 1));
2334 uint64_t rBit = mid & 0x1;
2335 mid >>= 1;
2336 mid |= -(mid & ((BigElement)1 <<
2337 (sizeof(BigElement) * 8 - 1 - imm)));
2338 mid += rBit;
2339 if (bits(mid, sizeof(BigElement) * 8 - 1,
2340 sizeof(Element) * 8) != 0) {
2341 if (srcElem1 < 0) {
2342 destElem = 0;
2343 } else {
2344 destElem = mask(sizeof(Element) * 8);
2345 }
2346 fpscr.qc = 1;
2347 } else {
2348 destElem = mid;
2349 }
2350 } else {
2351 if (srcElem1 < 0) {
2352 fpscr.qc = 1;
2353 destElem = 0;
2354 } else {
2355 destElem = srcElem1;
2356 }
2357 }
2358 FpscrQc = fpscr;
2359 '''
2360 twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes,
2361 sqrshrunCode, hasImm=True)
2362 twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp",
2363 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True)
2364 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp",
2365 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True)
2366 # SQSHL (immediate)
2367 sqshlImmCode = '''
2368 FPSCR fpscr = (FPSCR) FpscrQc;
2369 if (imm >= sizeof(Element) * 8) {
2370 if (srcElem1 != 0) {
2210 }
2211 fpscr.qc = 1;
2212 }
2213 FpscrQc = fpscr;
2214 '''
2215 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp",
2216 ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True)
2217 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp",
2218 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True)
2219 threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp",
2220 ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True,
2221 scalar=True)
2222 # SQRDMULH (vector)
2223 threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp",
2224 ("int16_t", "int32_t"), 2, sqrdmulhCode)
2225 threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp",
2226 ("int16_t", "int32_t"), 4, sqrdmulhCode)
2227 threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp",
2228 ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True)
2229 # SQRSHL
2230 sqrshlCode = '''
2231 int16_t shiftAmt = (int8_t)srcElem2;
2232 FPSCR fpscr = (FPSCR) FpscrQc;
2233 if (shiftAmt < 0) {
2234 shiftAmt = -shiftAmt;
2235 Element rBit = 0;
2236 if (shiftAmt <= sizeof(Element) * 8)
2237 rBit = bits(srcElem1, shiftAmt - 1);
2238 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2239 rBit = 1;
2240 if (shiftAmt >= sizeof(Element) * 8) {
2241 shiftAmt = sizeof(Element) * 8 - 1;
2242 destElem = 0;
2243 } else {
2244 destElem = (srcElem1 >> shiftAmt);
2245 }
2246 // Make sure the right shift sign extended when it should.
2247 if (srcElem1 < 0 && destElem >= 0) {
2248 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2249 1 - shiftAmt));
2250 }
2251 destElem += rBit;
2252 } else if (shiftAmt > 0) {
2253 bool sat = false;
2254 if (shiftAmt >= sizeof(Element) * 8) {
2255 if (srcElem1 != 0)
2256 sat = true;
2257 else
2258 destElem = 0;
2259 } else {
2260 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
2261 sizeof(Element) * 8 - 1 - shiftAmt) !=
2262 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2263 sat = true;
2264 } else {
2265 destElem = srcElem1 << shiftAmt;
2266 }
2267 }
2268 if (sat) {
2269 fpscr.qc = 1;
2270 destElem = mask(sizeof(Element) * 8 - 1);
2271 if (srcElem1 < 0)
2272 destElem = ~destElem;
2273 }
2274 } else {
2275 destElem = srcElem1;
2276 }
2277 FpscrQc = fpscr;
2278 '''
2279 threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2,
2280 sqrshlCode)
2281 threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4,
2282 sqrshlCode)
2283 threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4,
2284 sqrshlCode, scalar=True)
2285 # SQRSHRN, SQRSHRN2
2286 sqrshrnCode = '''
2287 FPSCR fpscr = (FPSCR) FpscrQc;
2288 if (imm > sizeof(srcElem1) * 8) {
2289 if (srcElem1 != 0 && srcElem1 != -1)
2290 fpscr.qc = 1;
2291 destElem = 0;
2292 } else if (imm) {
2293 BigElement mid = (srcElem1 >> (imm - 1));
2294 uint64_t rBit = mid & 0x1;
2295 mid >>= 1;
2296 mid |= -(mid & ((BigElement)1 <<
2297 (sizeof(BigElement) * 8 - 1 - imm)));
2298 mid += rBit;
2299 if (mid != (Element)mid) {
2300 destElem = mask(sizeof(Element) * 8 - 1);
2301 if (srcElem1 < 0)
2302 destElem = ~destElem;
2303 fpscr.qc = 1;
2304 } else {
2305 destElem = mid;
2306 }
2307 } else {
2308 if (srcElem1 != (Element)srcElem1) {
2309 destElem = mask(sizeof(Element) * 8 - 1);
2310 if (srcElem1 < 0)
2311 destElem = ~destElem;
2312 fpscr.qc = 1;
2313 } else {
2314 destElem = srcElem1;
2315 }
2316 }
2317 FpscrQc = fpscr;
2318 '''
2319 twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes,
2320 sqrshrnCode, hasImm=True)
2321 twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes,
2322 sqrshrnCode, hasImm=True, hi=True)
2323 twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes,
2324 sqrshrnCode, hasImm=True, scalar=True)
2325 # SQRSHRUN, SQRSHRUN2
2326 sqrshrunCode = '''
2327 FPSCR fpscr = (FPSCR) FpscrQc;
2328 if (imm > sizeof(srcElem1) * 8) {
2329 if (srcElem1 != 0)
2330 fpscr.qc = 1;
2331 destElem = 0;
2332 } else if (imm) {
2333 BigElement mid = (srcElem1 >> (imm - 1));
2334 uint64_t rBit = mid & 0x1;
2335 mid >>= 1;
2336 mid |= -(mid & ((BigElement)1 <<
2337 (sizeof(BigElement) * 8 - 1 - imm)));
2338 mid += rBit;
2339 if (bits(mid, sizeof(BigElement) * 8 - 1,
2340 sizeof(Element) * 8) != 0) {
2341 if (srcElem1 < 0) {
2342 destElem = 0;
2343 } else {
2344 destElem = mask(sizeof(Element) * 8);
2345 }
2346 fpscr.qc = 1;
2347 } else {
2348 destElem = mid;
2349 }
2350 } else {
2351 if (srcElem1 < 0) {
2352 fpscr.qc = 1;
2353 destElem = 0;
2354 } else {
2355 destElem = srcElem1;
2356 }
2357 }
2358 FpscrQc = fpscr;
2359 '''
2360 twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes,
2361 sqrshrunCode, hasImm=True)
2362 twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp",
2363 smallSignedTypes, sqrshrunCode, hasImm=True, hi=True)
2364 twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp",
2365 smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True)
2366 # SQSHL (immediate)
2367 sqshlImmCode = '''
2368 FPSCR fpscr = (FPSCR) FpscrQc;
2369 if (imm >= sizeof(Element) * 8) {
2370 if (srcElem1 != 0) {
2371 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2371 destElem = std::numeric_limits<Element>::min();
2372 if (srcElem1 > 0)
2373 destElem = ~destElem;
2374 fpscr.qc = 1;
2375 } else {
2376 destElem = 0;
2377 }
2378 } else if (imm) {
2379 destElem = (srcElem1 << imm);
2380 uint64_t topBits = bits((uint64_t)srcElem1,
2381 sizeof(Element) * 8 - 1,
2382 sizeof(Element) * 8 - 1 - imm);
2383 if (topBits != 0 && topBits != mask(imm + 1)) {
2372 if (srcElem1 > 0)
2373 destElem = ~destElem;
2374 fpscr.qc = 1;
2375 } else {
2376 destElem = 0;
2377 }
2378 } else if (imm) {
2379 destElem = (srcElem1 << imm);
2380 uint64_t topBits = bits((uint64_t)srcElem1,
2381 sizeof(Element) * 8 - 1,
2382 sizeof(Element) * 8 - 1 - imm);
2383 if (topBits != 0 && topBits != mask(imm + 1)) {
2384 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2384 destElem = std::numeric_limits<Element>::min();
2385 if (srcElem1 > 0)
2386 destElem = ~destElem;
2387 fpscr.qc = 1;
2388 }
2389 } else {
2390 destElem = srcElem1;
2391 }
2392 FpscrQc = fpscr;
2393 '''
2394 twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2,
2395 sqshlImmCode, hasImm=True)
2396 twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4,
2397 sqshlImmCode, hasImm=True)
2398 twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4,
2399 sqshlImmCode, hasImm=True, scalar=True)
2400 # SQSHL (register)
2401 sqshlCode = '''
2402 int16_t shiftAmt = (int8_t)srcElem2;
2403 FPSCR fpscr = (FPSCR) FpscrQc;
2404 if (shiftAmt < 0) {
2405 shiftAmt = -shiftAmt;
2406 if (shiftAmt >= sizeof(Element) * 8) {
2407 shiftAmt = sizeof(Element) * 8 - 1;
2408 destElem = 0;
2409 } else {
2410 destElem = (srcElem1 >> shiftAmt);
2411 }
2412 // Make sure the right shift sign extended when it should.
2413 if (srcElem1 < 0 && destElem >= 0) {
2414 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2415 1 - shiftAmt));
2416 }
2417 } else if (shiftAmt > 0) {
2418 bool sat = false;
2419 if (shiftAmt >= sizeof(Element) * 8) {
2420 if (srcElem1 != 0)
2421 sat = true;
2422 else
2423 destElem = 0;
2424 } else {
2425 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
2426 sizeof(Element) * 8 - 1 - shiftAmt) !=
2427 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2428 sat = true;
2429 } else {
2430 destElem = srcElem1 << shiftAmt;
2431 }
2432 }
2433 if (sat) {
2434 fpscr.qc = 1;
2435 destElem = mask(sizeof(Element) * 8 - 1);
2436 if (srcElem1 < 0)
2437 destElem = ~destElem;
2438 }
2439 } else {
2440 destElem = srcElem1;
2441 }
2442 FpscrQc = fpscr;
2443 '''
2444 threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2,
2445 sqshlCode)
2446 threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4,
2447 sqshlCode)
2448 threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4,
2449 sqshlCode, scalar=True)
2450 # SQSHLU
2451 sqshluCode = '''
2452 FPSCR fpscr = (FPSCR) FpscrQc;
2453 if (imm >= sizeof(Element) * 8) {
2454 if (srcElem1 < 0) {
2455 destElem = 0;
2456 fpscr.qc = 1;
2457 } else if (srcElem1 > 0) {
2458 destElem = mask(sizeof(Element) * 8);
2459 fpscr.qc = 1;
2460 } else {
2461 destElem = 0;
2462 }
2463 } else if (imm) {
2464 destElem = (srcElem1 << imm);
2465 uint64_t topBits = bits((uint64_t)srcElem1,
2466 sizeof(Element) * 8 - 1,
2467 sizeof(Element) * 8 - imm);
2468 if (srcElem1 < 0) {
2469 destElem = 0;
2470 fpscr.qc = 1;
2471 } else if (topBits != 0) {
2472 destElem = mask(sizeof(Element) * 8);
2473 fpscr.qc = 1;
2474 }
2475 } else {
2476 if (srcElem1 < 0) {
2477 fpscr.qc = 1;
2478 destElem = 0;
2479 } else {
2480 destElem = srcElem1;
2481 }
2482 }
2483 FpscrQc = fpscr;
2484 '''
2485 twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2,
2486 sqshluCode, hasImm=True)
2487 twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4,
2488 sqshluCode, hasImm=True)
2489 twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4,
2490 sqshluCode, hasImm=True, scalar=True)
2491 # SQSHRN, SQSHRN2
2492 sqshrnCode = '''
2493 FPSCR fpscr = (FPSCR) FpscrQc;
2494 if (imm > sizeof(srcElem1) * 8) {
2495 if (srcElem1 != 0 && srcElem1 != -1)
2496 fpscr.qc = 1;
2497 destElem = 0;
2498 } else if (imm) {
2499 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2500 mid |= -(mid & ((BigElement)1 <<
2501 (sizeof(BigElement) * 8 - 1 - imm)));
2502 if (mid != (Element)mid) {
2503 destElem = mask(sizeof(Element) * 8 - 1);
2504 if (srcElem1 < 0)
2505 destElem = ~destElem;
2506 fpscr.qc = 1;
2507 } else {
2508 destElem = mid;
2509 }
2510 } else {
2511 destElem = srcElem1;
2512 }
2513 FpscrQc = fpscr;
2514 '''
2515 twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes,
2516 sqshrnCode, hasImm=True)
2517 twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes,
2518 sqshrnCode, hasImm=True, hi=True)
2519 twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes,
2520 sqshrnCode, hasImm=True, scalar=True)
2521 # SQSHRUN, SQSHRUN2
2522 sqshrunCode = '''
2523 FPSCR fpscr = (FPSCR) FpscrQc;
2524 if (imm > sizeof(srcElem1) * 8) {
2525 if (srcElem1 != 0)
2526 fpscr.qc = 1;
2527 destElem = 0;
2528 } else if (imm) {
2529 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2530 if (bits(mid, sizeof(BigElement) * 8 - 1,
2531 sizeof(Element) * 8) != 0) {
2532 if (srcElem1 < 0) {
2533 destElem = 0;
2534 } else {
2535 destElem = mask(sizeof(Element) * 8);
2536 }
2537 fpscr.qc = 1;
2538 } else {
2539 destElem = mid;
2540 }
2541 } else {
2542 destElem = srcElem1;
2543 }
2544 FpscrQc = fpscr;
2545 '''
2546 twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes,
2547 sqshrunCode, hasImm=True)
2548 twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes,
2549 sqshrunCode, hasImm=True, hi=True)
2550 twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes,
2551 sqshrunCode, hasImm=True, scalar=True)
2552 # SQSUB
2553 sqsubCode = '''
2554 destElem = srcElem1 - srcElem2;
2555 FPSCR fpscr = (FPSCR) FpscrQc;
2556 bool negDest = (destElem < 0);
2557 bool negSrc1 = (srcElem1 < 0);
2558 bool posSrc2 = (srcElem2 >= 0);
2559 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2385 if (srcElem1 > 0)
2386 destElem = ~destElem;
2387 fpscr.qc = 1;
2388 }
2389 } else {
2390 destElem = srcElem1;
2391 }
2392 FpscrQc = fpscr;
2393 '''
2394 twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2,
2395 sqshlImmCode, hasImm=True)
2396 twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4,
2397 sqshlImmCode, hasImm=True)
2398 twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4,
2399 sqshlImmCode, hasImm=True, scalar=True)
2400 # SQSHL (register)
2401 sqshlCode = '''
2402 int16_t shiftAmt = (int8_t)srcElem2;
2403 FPSCR fpscr = (FPSCR) FpscrQc;
2404 if (shiftAmt < 0) {
2405 shiftAmt = -shiftAmt;
2406 if (shiftAmt >= sizeof(Element) * 8) {
2407 shiftAmt = sizeof(Element) * 8 - 1;
2408 destElem = 0;
2409 } else {
2410 destElem = (srcElem1 >> shiftAmt);
2411 }
2412 // Make sure the right shift sign extended when it should.
2413 if (srcElem1 < 0 && destElem >= 0) {
2414 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2415 1 - shiftAmt));
2416 }
2417 } else if (shiftAmt > 0) {
2418 bool sat = false;
2419 if (shiftAmt >= sizeof(Element) * 8) {
2420 if (srcElem1 != 0)
2421 sat = true;
2422 else
2423 destElem = 0;
2424 } else {
2425 if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
2426 sizeof(Element) * 8 - 1 - shiftAmt) !=
2427 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2428 sat = true;
2429 } else {
2430 destElem = srcElem1 << shiftAmt;
2431 }
2432 }
2433 if (sat) {
2434 fpscr.qc = 1;
2435 destElem = mask(sizeof(Element) * 8 - 1);
2436 if (srcElem1 < 0)
2437 destElem = ~destElem;
2438 }
2439 } else {
2440 destElem = srcElem1;
2441 }
2442 FpscrQc = fpscr;
2443 '''
2444 threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2,
2445 sqshlCode)
2446 threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4,
2447 sqshlCode)
2448 threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4,
2449 sqshlCode, scalar=True)
2450 # SQSHLU
2451 sqshluCode = '''
2452 FPSCR fpscr = (FPSCR) FpscrQc;
2453 if (imm >= sizeof(Element) * 8) {
2454 if (srcElem1 < 0) {
2455 destElem = 0;
2456 fpscr.qc = 1;
2457 } else if (srcElem1 > 0) {
2458 destElem = mask(sizeof(Element) * 8);
2459 fpscr.qc = 1;
2460 } else {
2461 destElem = 0;
2462 }
2463 } else if (imm) {
2464 destElem = (srcElem1 << imm);
2465 uint64_t topBits = bits((uint64_t)srcElem1,
2466 sizeof(Element) * 8 - 1,
2467 sizeof(Element) * 8 - imm);
2468 if (srcElem1 < 0) {
2469 destElem = 0;
2470 fpscr.qc = 1;
2471 } else if (topBits != 0) {
2472 destElem = mask(sizeof(Element) * 8);
2473 fpscr.qc = 1;
2474 }
2475 } else {
2476 if (srcElem1 < 0) {
2477 fpscr.qc = 1;
2478 destElem = 0;
2479 } else {
2480 destElem = srcElem1;
2481 }
2482 }
2483 FpscrQc = fpscr;
2484 '''
2485 twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2,
2486 sqshluCode, hasImm=True)
2487 twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4,
2488 sqshluCode, hasImm=True)
2489 twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4,
2490 sqshluCode, hasImm=True, scalar=True)
2491 # SQSHRN, SQSHRN2
2492 sqshrnCode = '''
2493 FPSCR fpscr = (FPSCR) FpscrQc;
2494 if (imm > sizeof(srcElem1) * 8) {
2495 if (srcElem1 != 0 && srcElem1 != -1)
2496 fpscr.qc = 1;
2497 destElem = 0;
2498 } else if (imm) {
2499 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2500 mid |= -(mid & ((BigElement)1 <<
2501 (sizeof(BigElement) * 8 - 1 - imm)));
2502 if (mid != (Element)mid) {
2503 destElem = mask(sizeof(Element) * 8 - 1);
2504 if (srcElem1 < 0)
2505 destElem = ~destElem;
2506 fpscr.qc = 1;
2507 } else {
2508 destElem = mid;
2509 }
2510 } else {
2511 destElem = srcElem1;
2512 }
2513 FpscrQc = fpscr;
2514 '''
2515 twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes,
2516 sqshrnCode, hasImm=True)
2517 twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes,
2518 sqshrnCode, hasImm=True, hi=True)
2519 twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes,
2520 sqshrnCode, hasImm=True, scalar=True)
2521 # SQSHRUN, SQSHRUN2
2522 sqshrunCode = '''
2523 FPSCR fpscr = (FPSCR) FpscrQc;
2524 if (imm > sizeof(srcElem1) * 8) {
2525 if (srcElem1 != 0)
2526 fpscr.qc = 1;
2527 destElem = 0;
2528 } else if (imm) {
2529 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2530 if (bits(mid, sizeof(BigElement) * 8 - 1,
2531 sizeof(Element) * 8) != 0) {
2532 if (srcElem1 < 0) {
2533 destElem = 0;
2534 } else {
2535 destElem = mask(sizeof(Element) * 8);
2536 }
2537 fpscr.qc = 1;
2538 } else {
2539 destElem = mid;
2540 }
2541 } else {
2542 destElem = srcElem1;
2543 }
2544 FpscrQc = fpscr;
2545 '''
2546 twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes,
2547 sqshrunCode, hasImm=True)
2548 twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes,
2549 sqshrunCode, hasImm=True, hi=True)
2550 twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes,
2551 sqshrunCode, hasImm=True, scalar=True)
2552 # SQSUB
2553 sqsubCode = '''
2554 destElem = srcElem1 - srcElem2;
2555 FPSCR fpscr = (FPSCR) FpscrQc;
2556 bool negDest = (destElem < 0);
2557 bool negSrc1 = (srcElem1 < 0);
2558 bool posSrc2 = (srcElem2 >= 0);
2559 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2560 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2560 destElem = std::numeric_limits<Element>::min();
2561 if (negDest)
2562 destElem -= 1;
2563 fpscr.qc = 1;
2564 }
2565 FpscrQc = fpscr;
2566 '''
2567 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2,
2568 sqsubCode)
2569 threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4,
2570 sqsubCode)
2571 threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4,
2572 sqsubCode, scalar=True)
2573 # SQXTN, SQXTN2
2574 sqxtnCode = '''
2575 FPSCR fpscr = (FPSCR) FpscrQc;
2576 destElem = srcElem1;
2577 if ((BigElement)destElem != srcElem1) {
2578 fpscr.qc = 1;
2579 destElem = mask(sizeof(Element) * 8 - 1);
2580 if (srcElem1 < 0)
2581 destElem = ~destElem;
2582 }
2583 FpscrQc = fpscr;
2584 '''
2585 twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes,
2586 sqxtnCode)
2587 twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes,
2588 sqxtnCode, hi=True)
2589 twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes,
2590 sqxtnCode, scalar=True)
2591 # SQXTUN, SQXTUN2
2592 sqxtunCode = '''
2593 FPSCR fpscr = (FPSCR) FpscrQc;
2594 destElem = srcElem1;
2595 if (srcElem1 < 0 ||
2596 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
2597 fpscr.qc = 1;
2598 destElem = mask(sizeof(Element) * 8);
2599 if (srcElem1 < 0)
2600 destElem = ~destElem;
2601 }
2602 FpscrQc = fpscr;
2603 '''
2604 twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes,
2605 sqxtunCode)
2606 twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes,
2607 sqxtunCode, hi=True)
2608 twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes,
2609 sqxtunCode, scalar=True)
2610 # SRHADD
2611 rhaddCode = '''
2612 Element carryBit =
2613 (((unsigned)srcElem1 & 0x1) +
2614 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2615 // Use division instead of a shift to ensure the sign extension works
2616 // right. The compiler will figure out if it can be a shift. Mask the
2617 // inputs so they get truncated correctly.
2618 destElem = (((srcElem1 & ~(Element)1) / 2) +
2619 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2620 '''
2621 threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2,
2622 rhaddCode)
2623 threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4,
2624 rhaddCode)
2625 # SRI
2626 sriCode = '''
2627 if (imm >= sizeof(Element) * 8)
2628 destElem = destElem;
2629 else
2630 destElem = (srcElem1 >> imm) |
2631 (destElem & ~mask(sizeof(Element) * 8 - imm));
2632 '''
2633 twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode,
2634 True, hasImm=True)
2635 twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode,
2636 True, hasImm=True)
2637 # SRSHL
2638 rshlCode = '''
2639 int16_t shiftAmt = (int8_t)srcElem2;
2640 if (shiftAmt < 0) {
2641 shiftAmt = -shiftAmt;
2642 Element rBit = 0;
2643 if (shiftAmt <= sizeof(Element) * 8)
2644 rBit = bits(srcElem1, shiftAmt - 1);
2645 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2646 rBit = 1;
2647 if (shiftAmt >= sizeof(Element) * 8) {
2648 shiftAmt = sizeof(Element) * 8 - 1;
2649 destElem = 0;
2650 } else {
2651 destElem = (srcElem1 >> shiftAmt);
2652 }
2653 // Make sure the right shift sign extended when it should.
2654 if (ltz(srcElem1) && !ltz(destElem)) {
2655 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2656 1 - shiftAmt));
2657 }
2658 destElem += rBit;
2659 } else if (shiftAmt > 0) {
2660 if (shiftAmt >= sizeof(Element) * 8) {
2661 destElem = 0;
2662 } else {
2663 destElem = srcElem1 << shiftAmt;
2664 }
2665 } else {
2666 destElem = srcElem1;
2667 }
2668 '''
2669 threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2,
2670 rshlCode)
2671 threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4,
2672 rshlCode)
2673 # SRSHR
2674 rshrCode = '''
2675 if (imm > sizeof(srcElem1) * 8) {
2676 destElem = 0;
2677 } else if (imm) {
2678 Element rBit = bits(srcElem1, imm - 1);
2679 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2680 } else {
2681 destElem = srcElem1;
2682 }
2683 '''
2684 twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2,
2685 rshrCode, hasImm=True)
2686 twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4,
2687 rshrCode, hasImm=True)
2688 # SRSRA
2689 rsraCode = '''
2690 if (imm > sizeof(srcElem1) * 8) {
2691 destElem += 0;
2692 } else if (imm) {
2693 Element rBit = bits(srcElem1, imm - 1);
2694 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2695 } else {
2696 destElem += srcElem1;
2697 }
2698 '''
2699 twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2,
2700 rsraCode, True, hasImm=True)
2701 twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4,
2702 rsraCode, True, hasImm=True)
2703 # SSHL
2704 shlCode = '''
2705 int16_t shiftAmt = (int8_t)srcElem2;
2706 if (shiftAmt < 0) {
2707 shiftAmt = -shiftAmt;
2708 if (shiftAmt >= sizeof(Element) * 8) {
2709 shiftAmt = sizeof(Element) * 8 - 1;
2710 destElem = 0;
2711 } else {
2712 destElem = (srcElem1 >> shiftAmt);
2713 }
2714 // Make sure the right shift sign extended when it should.
2715 if (ltz(srcElem1) && !ltz(destElem)) {
2716 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2717 1 - shiftAmt));
2718 }
2719 } else {
2720 if (shiftAmt >= sizeof(Element) * 8) {
2721 destElem = 0;
2722 } else {
2723 destElem = srcElem1 << shiftAmt;
2724 }
2725 }
2726 '''
2727 threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2,
2728 shlCode)
2729 threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4,
2730 shlCode)
2731 # SSHLL, SSHLL2
2732 shllCode = '''
2733 if (imm >= sizeof(destElem) * 8) {
2734 destElem = 0;
2735 } else {
2736 destElem = (BigElement)srcElem1 << imm;
2737 }
2738 '''
2739 twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes,
2740 shllCode, hasImm=True)
2741 twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes,
2742 shllCode, hasImm=True, hi=True)
2743 # SSHR
2744 shrCode = '''
2745 if (imm >= sizeof(srcElem1) * 8) {
2746 if (ltz(srcElem1))
2747 destElem = -1;
2748 else
2749 destElem = 0;
2750 } else {
2751 destElem = srcElem1 >> imm;
2752 }
2753 '''
2754 twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode,
2755 hasImm=True)
2756 twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode,
2757 hasImm=True)
2758 # SSRA
2759 sraCode = '''
2760 Element mid;;
2761 if (imm >= sizeof(srcElem1) * 8) {
2762 mid = ltz(srcElem1) ? -1 : 0;
2763 } else {
2764 mid = srcElem1 >> imm;
2765 if (ltz(srcElem1) && !ltz(mid)) {
2766 mid |= -(mid & ((Element)1 <<
2767 (sizeof(Element) * 8 - 1 - imm)));
2768 }
2769 }
2770 destElem += mid;
2771 '''
2772 twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode,
2773 True, hasImm=True)
2774 twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode,
2775 True, hasImm=True)
2776 # SSUBL
2777 sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;"
2778 threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes,
2779 sublwCode)
2780 threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes,
2781 sublwCode, hi=True)
2782 # SSUBW
2783 threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes,
2784 sublwCode)
2785 threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes,
2786 sublwCode, hi=True)
2787 # SUB
2788 subCode = "destElem = srcElem1 - srcElem2;"
2789 threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode)
2790 threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode)
2791 # SUBHN, SUBHN2
2792 subhnCode = '''
2793 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2794 (sizeof(Element) * 8);
2795 '''
2796 threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes,
2797 subhnCode)
2798 threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes,
2799 subhnCode, hi=True)
2800 # SUQADD
2801 suqaddCode = '''
2802 FPSCR fpscr = (FPSCR) FpscrQc;
2803 Element tmp = destElem + srcElem1;
2804 if (bits(destElem, sizeof(Element) * 8 - 1) == 0) {
2805 if (bits(tmp, sizeof(Element) * 8 - 1) == 1 ||
2806 tmp < srcElem1 || tmp < destElem) {
2807 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
2808 fpscr.qc = 1;
2809 } else {
2810 destElem = tmp;
2811 }
2812 } else {
2813 Element absDestElem = (~destElem) + 1;
2814 if (absDestElem < srcElem1) {
2815 // Still check for positive sat., no need to check for negative sat.
2816 if (bits(tmp, sizeof(Element) * 8 - 1) == 1) {
2817 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
2818 fpscr.qc = 1;
2819 } else {
2820 destElem = tmp;
2821 }
2822 } else {
2823 destElem = tmp;
2824 }
2825 }
2826 FpscrQc = fpscr;
2827 '''
2828 twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
2829 suqaddCode, True)
2830 twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4,
2831 suqaddCode, True)
2832 twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4,
2833 suqaddCode, True, scalar=True)
2834 # SXTL -> alias to SSHLL
2835 # TBL
2836 tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2)
2837 tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4)
2838 tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2)
2839 tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4)
2840 tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2)
2841 tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4)
2842 tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2)
2843 tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4)
2844 # TBX
2845 tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2)
2846 tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4)
2847 tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2)
2848 tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4)
2849 tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2)
2850 tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4)
2851 tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2)
2852 tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4)
2853 # TRN1
2854 trnCode = '''
2855 unsigned part = %s;
2856 for (unsigned i = 0; i < eCount / 2; i++) {
2857 destReg.elements[2 * i] = srcReg1.elements[2 * i + part];
2858 destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part];
2859 }
2860 '''
2861 threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2,
2862 trnCode % "0")
2863 threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4,
2864 trnCode % "0")
2865 # TRN2
2866 threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2,
2867 trnCode % "1")
2868 threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4,
2869 trnCode % "1")
2870 # UABA
2871 threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2,
2872 abaCode, True)
2873 threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4,
2874 abaCode, True)
2875 # UABAL, UABAL2
2876 threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes,
2877 abalCode, True)
2878 threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes,
2879 abalCode, True, hi=True)
2880 # UABD
2881 threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2,
2882 abdCode)
2883 threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4,
2884 abdCode)
2885 # UABDL, UABDL2
2886 threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes,
2887 abdlCode, True)
2888 threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes,
2889 abdlCode, True, hi=True)
2890 # UADALP
2891 twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes,
2892 2, adalpCode, True)
2893 twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes,
2894 4, adalpCode, True)
2895 # UADDL, UADDL2
2896 threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes,
2897 addlwCode)
2898 threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes,
2899 addlwCode, hi=True)
2900 # UADDLP
2901 twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes,
2902 2, addlwCode)
2903 twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes,
2904 4, addlwCode)
2905 # UADDLV
2906 twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp",
2907 ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True)
2908 twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp",
2909 ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True)
2910 twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4,
2911 addAcrossLongCode, doubleDest=True, long=True)
2912 # UADDW
2913 threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes,
2914 addlwCode)
2915 threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes,
2916 addlwCode, hi=True)
2917 # UCVTF (fixed-point)
2918 ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true,"
2919 " FPCRRounding(fpscr), fpscr)")
2920 twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
2921 ucvtfFixedCode, hasImm=True)
2922 twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4,
2923 ucvtfFixedCode, hasImm=True)
2924 twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4,
2925 ucvtfFixedCode, hasImm=True, scalar=True)
2926 # UCVTF (integer)
2927 ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true,"
2928 " FPCRRounding(fpscr), fpscr)")
2929 twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
2930 ucvtfIntCode)
2931 twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4,
2932 ucvtfIntCode)
2933 twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4,
2934 ucvtfIntCode, scalar=True)
2935 # UHADD
2936 threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2,
2937 haddCode)
2938 threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4,
2939 haddCode)
2940 # UHSUB
2941 threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2,
2942 hsubCode)
2943 threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4,
2944 hsubCode)
2945 # UMAX
2946 threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2,
2947 maxCode)
2948 threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4,
2949 maxCode)
2950 # UMAXP
2951 threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2,
2952 maxCode, pairwise=True)
2953 threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4,
2954 maxCode, pairwise=True)
2955 # UMAXV
2956 twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
2957 2, maxAcrossCode)
2958 twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4,
2959 maxAcrossCode)
2960 # UMIN
2961 threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2,
2962 minCode)
2963 threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4,
2964 minCode)
2965 # UMINP
2966 threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2,
2967 minCode, pairwise=True)
2968 threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4,
2969 minCode, pairwise=True)
2970 # UMINV
2971 twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
2972 2, minAcrossCode)
2973 twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4,
2974 minAcrossCode)
2975 # UMLAL (by element)
2976 threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp",
2977 smallUnsignedTypes, mlalCode, True, byElem=True)
2978 threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp",
2979 smallUnsignedTypes, mlalCode, True, byElem=True, hi=True)
2980 # UMLAL (vector)
2981 threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes,
2982 mlalCode, True)
2983 threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes,
2984 mlalCode, True, hi=True)
2985 # UMLSL (by element)
2986 threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp",
2987 smallUnsignedTypes, mlslCode, True, byElem=True)
2988 threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp",
2989 smallUnsignedTypes, mlslCode, True, byElem=True, hi=True)
2990 # UMLSL (vector)
2991 threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes,
2992 mlslCode, True)
2993 threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes,
2994 mlslCode, True, hi=True)
2995 # UMOV
2996 insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
2997 insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X')
2998 # UMULL, UMULL2 (by element)
2999 threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes,
3000 mullCode, byElem=True)
3001 threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes,
3002 mullCode, byElem=True, hi=True)
3003 # UMULL, UMULL2 (vector)
3004 threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes,
3005 mullCode)
3006 threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes,
3007 mullCode, hi=True)
3008 # UQADD
3009 uqaddCode = '''
3010 destElem = srcElem1 + srcElem2;
3011 FPSCR fpscr = (FPSCR) FpscrQc;
3012 if (destElem < srcElem1 || destElem < srcElem2) {
3013 destElem = (Element)(-1);
3014 fpscr.qc = 1;
3015 }
3016 FpscrQc = fpscr;
3017 '''
3018 threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
3019 uqaddCode)
3020 threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4,
3021 uqaddCode)
3022 threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4,
3023 uqaddCode, scalar=True)
3024 # UQRSHL
3025 uqrshlCode = '''
3026 int16_t shiftAmt = (int8_t)srcElem2;
3027 FPSCR fpscr = (FPSCR) FpscrQc;
3028 if (shiftAmt < 0) {
3029 shiftAmt = -shiftAmt;
3030 Element rBit = 0;
3031 if (shiftAmt <= sizeof(Element) * 8)
3032 rBit = bits(srcElem1, shiftAmt - 1);
3033 if (shiftAmt >= sizeof(Element) * 8) {
3034 shiftAmt = sizeof(Element) * 8 - 1;
3035 destElem = 0;
3036 } else {
3037 destElem = (srcElem1 >> shiftAmt);
3038 }
3039 destElem += rBit;
3040 } else {
3041 if (shiftAmt >= sizeof(Element) * 8) {
3042 if (srcElem1 != 0) {
3043 destElem = mask(sizeof(Element) * 8);
3044 fpscr.qc = 1;
3045 } else {
3046 destElem = 0;
3047 }
3048 } else {
3049 if (bits(srcElem1, sizeof(Element) * 8 - 1,
3050 sizeof(Element) * 8 - shiftAmt)) {
3051 destElem = mask(sizeof(Element) * 8);
3052 fpscr.qc = 1;
3053 } else {
3054 destElem = srcElem1 << shiftAmt;
3055 }
3056 }
3057 }
3058 FpscrQc = fpscr;
3059 '''
3060 threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes,
3061 2, uqrshlCode)
3062 threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4,
3063 uqrshlCode)
3064 threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4,
3065 uqrshlCode, scalar=True)
3066 # UQRSHRN
3067 uqrshrnCode = '''
3068 FPSCR fpscr = (FPSCR) FpscrQc;
3069 if (imm > sizeof(srcElem1) * 8) {
3070 if (srcElem1 != 0)
3071 fpscr.qc = 1;
3072 destElem = 0;
3073 } else if (imm) {
3074 BigElement mid = (srcElem1 >> (imm - 1));
3075 uint64_t rBit = mid & 0x1;
3076 mid >>= 1;
3077 mid += rBit;
3078 if (mid != (Element)mid) {
3079 destElem = mask(sizeof(Element) * 8);
3080 fpscr.qc = 1;
3081 } else {
3082 destElem = mid;
3083 }
3084 } else {
3085 if (srcElem1 != (Element)srcElem1) {
3086 destElem = mask(sizeof(Element) * 8 - 1);
3087 fpscr.qc = 1;
3088 } else {
3089 destElem = srcElem1;
3090 }
3091 }
3092 FpscrQc = fpscr;
3093 '''
3094 twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes,
3095 uqrshrnCode, hasImm=True)
3096 twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp",
3097 smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True)
3098 twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp",
3099 smallUnsignedTypes, uqrshrnCode, hasImm=True,
3100 scalar=True)
3101 # UQSHL (immediate)
3102 uqshlImmCode = '''
3103 FPSCR fpscr = (FPSCR) FpscrQc;
3104 if (imm >= sizeof(Element) * 8) {
3105 if (srcElem1 != 0) {
3106 destElem = mask(sizeof(Element) * 8);
3107 fpscr.qc = 1;
3108 } else {
3109 destElem = 0;
3110 }
3111 } else if (imm) {
3112 destElem = (srcElem1 << imm);
3113 uint64_t topBits = bits((uint64_t)srcElem1,
3114 sizeof(Element) * 8 - 1,
3115 sizeof(Element) * 8 - imm);
3116 if (topBits != 0) {
3117 destElem = mask(sizeof(Element) * 8);
3118 fpscr.qc = 1;
3119 }
3120 } else {
3121 destElem = srcElem1;
3122 }
3123 FpscrQc = fpscr;
3124 '''
3125 twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2,
3126 uqshlImmCode, hasImm=True)
3127 twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4,
3128 uqshlImmCode, hasImm=True)
3129 twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4,
3130 uqshlImmCode, hasImm=True, scalar=True)
3131 # UQSHL (register)
3132 uqshlCode = '''
3133 int16_t shiftAmt = (int8_t)srcElem2;
3134 FPSCR fpscr = (FPSCR) FpscrQc;
3135 if (shiftAmt < 0) {
3136 shiftAmt = -shiftAmt;
3137 if (shiftAmt >= sizeof(Element) * 8) {
3138 shiftAmt = sizeof(Element) * 8 - 1;
3139 destElem = 0;
3140 } else {
3141 destElem = (srcElem1 >> shiftAmt);
3142 }
3143 } else if (shiftAmt > 0) {
3144 if (shiftAmt >= sizeof(Element) * 8) {
3145 if (srcElem1 != 0) {
3146 destElem = mask(sizeof(Element) * 8);
3147 fpscr.qc = 1;
3148 } else {
3149 destElem = 0;
3150 }
3151 } else {
3152 if (bits(srcElem1, sizeof(Element) * 8 - 1,
3153 sizeof(Element) * 8 - shiftAmt)) {
3154 destElem = mask(sizeof(Element) * 8);
3155 fpscr.qc = 1;
3156 } else {
3157 destElem = srcElem1 << shiftAmt;
3158 }
3159 }
3160 } else {
3161 destElem = srcElem1;
3162 }
3163 FpscrQc = fpscr;
3164 '''
3165 threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2,
3166 uqshlCode)
3167 threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4,
3168 uqshlCode)
3169 threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4,
3170 uqshlCode, scalar=True)
3171 # UQSHRN, UQSHRN2
3172 uqshrnCode = '''
3173 FPSCR fpscr = (FPSCR) FpscrQc;
3174 if (imm > sizeof(srcElem1) * 8) {
3175 if (srcElem1 != 0)
3176 fpscr.qc = 1;
3177 destElem = 0;
3178 } else if (imm) {
3179 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3180 if (mid != (Element)mid) {
3181 destElem = mask(sizeof(Element) * 8);
3182 fpscr.qc = 1;
3183 } else {
3184 destElem = mid;
3185 }
3186 } else {
3187 destElem = srcElem1;
3188 }
3189 FpscrQc = fpscr;
3190 '''
3191 twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes,
3192 uqshrnCode, hasImm=True)
3193 twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes,
3194 uqshrnCode, hasImm=True, hi=True)
3195 twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes,
3196 uqshrnCode, hasImm=True, scalar=True)
3197 # UQSUB
3198 uqsubCode = '''
3199 destElem = srcElem1 - srcElem2;
3200 FPSCR fpscr = (FPSCR) FpscrQc;
3201 if (destElem > srcElem1) {
3202 destElem = 0;
3203 fpscr.qc = 1;
3204 }
3205 FpscrQc = fpscr;
3206 '''
3207 threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2,
3208 uqsubCode)
3209 threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4,
3210 uqsubCode)
3211 threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4,
3212 uqsubCode, scalar=True)
3213 # UQXTN
3214 uqxtnCode = '''
3215 FPSCR fpscr = (FPSCR) FpscrQc;
3216 destElem = srcElem1;
3217 if ((BigElement)destElem != srcElem1) {
3218 fpscr.qc = 1;
3219 destElem = mask(sizeof(Element) * 8);
3220 }
3221 FpscrQc = fpscr;
3222 '''
3223 twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes,
3224 uqxtnCode)
3225 twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes,
3226 uqxtnCode, hi=True)
3227 twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes,
3228 uqxtnCode, scalar=True)
3229 # URECPE
3230 urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);"
3231 twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2,
3232 urecpeCode)
3233 twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4,
3234 urecpeCode)
3235 # URHADD
3236 threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes,
3237 2, rhaddCode)
3238 threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes,
3239 4, rhaddCode)
3240 # URSHL
3241 threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2,
3242 rshlCode)
3243 threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4,
3244 rshlCode)
3245 # URSHR
3246 twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2,
3247 rshrCode, hasImm=True)
3248 twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4,
3249 rshrCode, hasImm=True)
3250 # URSQRTE
3251 ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);"
3252 twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2,
3253 ursqrteCode)
3254 twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4,
3255 ursqrteCode)
3256 # URSRA
3257 twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2,
3258 rsraCode, True, hasImm=True)
3259 twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4,
3260 rsraCode, True, hasImm=True)
3261 # USHL
3262 threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2,
3263 shlCode)
3264 threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4,
3265 shlCode)
3266 # USHLL, USHLL2
3267 twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes,
3268 shllCode, hasImm=True)
3269 twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes,
3270 shllCode, hi=True, hasImm=True)
3271 # USHR
3272 twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2,
3273 shrCode, hasImm=True)
3274 twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4,
3275 shrCode, hasImm=True)
3276 # USQADD
3277 usqaddCode = '''
3278 FPSCR fpscr = (FPSCR) FpscrQc;
3279 Element tmp = destElem + srcElem1;
3280 if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) {
3281 if (tmp < srcElem1 || tmp < destElem) {
3282 destElem = (Element)(-1);
3283 fpscr.qc = 1;
3284 } else {
3285 destElem = tmp;
3286 }
3287 } else {
3288 Element absSrcElem1 = (~srcElem1) + 1;
3289 if (absSrcElem1 > destElem) {
3290 destElem = 0;
3291 fpscr.qc = 1;
3292 } else {
3293 destElem = tmp;
3294 }
3295 }
3296 FpscrQc = fpscr;
3297 '''
3298 twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
3299 usqaddCode, True)
3300 twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4,
3301 usqaddCode, True)
3302 twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4,
3303 usqaddCode, True, scalar=True)
3304 # USRA
3305 twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2,
3306 sraCode, True, hasImm=True)
3307 twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4,
3308 sraCode, True, hasImm=True)
3309 # USUBL
3310 threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes,
3311 sublwCode)
3312 threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes,
3313 sublwCode, hi=True)
3314 # USUBW
3315 threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes,
3316 sublwCode)
3317 threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes,
3318 sublwCode, hi=True)
3319 # UXTL -> alias to USHLL
3320 # UZP1
3321 uzpCode = '''
3322 unsigned part = %s;
3323 for (unsigned i = 0; i < eCount / 2; i++) {
3324 destReg.elements[i] = srcReg1.elements[2 * i + part];
3325 destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part];
3326 }
3327 '''
3328 threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2,
3329 uzpCode % "0")
3330 threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4,
3331 uzpCode % "0")
3332 # UZP2
3333 threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2,
3334 uzpCode % "1")
3335 threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4,
3336 uzpCode % "1")
3337 # XTN, XTN2
3338 xtnCode = "destElem = srcElem1;"
3339 twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode)
3340 twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes,
3341 xtnCode, hi=True)
3342 # ZIP1
3343 zipCode = '''
3344 unsigned base = %s;
3345 for (unsigned i = 0; i < eCount / 2; i++) {
3346 destReg.elements[2 * i] = srcReg1.elements[base + i];
3347 destReg.elements[2 * i + 1] = srcReg2.elements[base + i];
3348 }
3349 '''
3350 threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2,
3351 zipCode % "0")
3352 threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4,
3353 zipCode % "0")
3354 # ZIP2
3355 threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2,
3356 zipCode % "eCount / 2")
3357 threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4,
3358 zipCode % "eCount / 2")
3359
3360 for decoderFlavour, type_dict in decoders.iteritems():
3361 header_output += '''
3362 class %(decoder_flavour)sDecoder {
3363 public:
3364 ''' % { "decoder_flavour" : decoderFlavour }
3365 for type,name in type_dict.iteritems():
3366 header_output += '''
3367 template<typename Elem> using %(type)s = %(new_name)s<Elem>;''' % {
3368 "type" : type, "new_name" : name
3369 }
3370 header_output += '''
3371 };'''
3372}};
2561 if (negDest)
2562 destElem -= 1;
2563 fpscr.qc = 1;
2564 }
2565 FpscrQc = fpscr;
2566 '''
2567 threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2,
2568 sqsubCode)
2569 threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4,
2570 sqsubCode)
2571 threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4,
2572 sqsubCode, scalar=True)
2573 # SQXTN, SQXTN2
2574 sqxtnCode = '''
2575 FPSCR fpscr = (FPSCR) FpscrQc;
2576 destElem = srcElem1;
2577 if ((BigElement)destElem != srcElem1) {
2578 fpscr.qc = 1;
2579 destElem = mask(sizeof(Element) * 8 - 1);
2580 if (srcElem1 < 0)
2581 destElem = ~destElem;
2582 }
2583 FpscrQc = fpscr;
2584 '''
2585 twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes,
2586 sqxtnCode)
2587 twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes,
2588 sqxtnCode, hi=True)
2589 twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes,
2590 sqxtnCode, scalar=True)
2591 # SQXTUN, SQXTUN2
2592 sqxtunCode = '''
2593 FPSCR fpscr = (FPSCR) FpscrQc;
2594 destElem = srcElem1;
2595 if (srcElem1 < 0 ||
2596 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
2597 fpscr.qc = 1;
2598 destElem = mask(sizeof(Element) * 8);
2599 if (srcElem1 < 0)
2600 destElem = ~destElem;
2601 }
2602 FpscrQc = fpscr;
2603 '''
2604 twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes,
2605 sqxtunCode)
2606 twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes,
2607 sqxtunCode, hi=True)
2608 twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes,
2609 sqxtunCode, scalar=True)
2610 # SRHADD
2611 rhaddCode = '''
2612 Element carryBit =
2613 (((unsigned)srcElem1 & 0x1) +
2614 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2615 // Use division instead of a shift to ensure the sign extension works
2616 // right. The compiler will figure out if it can be a shift. Mask the
2617 // inputs so they get truncated correctly.
2618 destElem = (((srcElem1 & ~(Element)1) / 2) +
2619 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2620 '''
2621 threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2,
2622 rhaddCode)
2623 threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4,
2624 rhaddCode)
2625 # SRI
2626 sriCode = '''
2627 if (imm >= sizeof(Element) * 8)
2628 destElem = destElem;
2629 else
2630 destElem = (srcElem1 >> imm) |
2631 (destElem & ~mask(sizeof(Element) * 8 - imm));
2632 '''
2633 twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode,
2634 True, hasImm=True)
2635 twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode,
2636 True, hasImm=True)
2637 # SRSHL
2638 rshlCode = '''
2639 int16_t shiftAmt = (int8_t)srcElem2;
2640 if (shiftAmt < 0) {
2641 shiftAmt = -shiftAmt;
2642 Element rBit = 0;
2643 if (shiftAmt <= sizeof(Element) * 8)
2644 rBit = bits(srcElem1, shiftAmt - 1);
2645 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2646 rBit = 1;
2647 if (shiftAmt >= sizeof(Element) * 8) {
2648 shiftAmt = sizeof(Element) * 8 - 1;
2649 destElem = 0;
2650 } else {
2651 destElem = (srcElem1 >> shiftAmt);
2652 }
2653 // Make sure the right shift sign extended when it should.
2654 if (ltz(srcElem1) && !ltz(destElem)) {
2655 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2656 1 - shiftAmt));
2657 }
2658 destElem += rBit;
2659 } else if (shiftAmt > 0) {
2660 if (shiftAmt >= sizeof(Element) * 8) {
2661 destElem = 0;
2662 } else {
2663 destElem = srcElem1 << shiftAmt;
2664 }
2665 } else {
2666 destElem = srcElem1;
2667 }
2668 '''
2669 threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2,
2670 rshlCode)
2671 threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4,
2672 rshlCode)
2673 # SRSHR
2674 rshrCode = '''
2675 if (imm > sizeof(srcElem1) * 8) {
2676 destElem = 0;
2677 } else if (imm) {
2678 Element rBit = bits(srcElem1, imm - 1);
2679 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2680 } else {
2681 destElem = srcElem1;
2682 }
2683 '''
2684 twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2,
2685 rshrCode, hasImm=True)
2686 twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4,
2687 rshrCode, hasImm=True)
2688 # SRSRA
2689 rsraCode = '''
2690 if (imm > sizeof(srcElem1) * 8) {
2691 destElem += 0;
2692 } else if (imm) {
2693 Element rBit = bits(srcElem1, imm - 1);
2694 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2695 } else {
2696 destElem += srcElem1;
2697 }
2698 '''
2699 twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2,
2700 rsraCode, True, hasImm=True)
2701 twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4,
2702 rsraCode, True, hasImm=True)
2703 # SSHL
2704 shlCode = '''
2705 int16_t shiftAmt = (int8_t)srcElem2;
2706 if (shiftAmt < 0) {
2707 shiftAmt = -shiftAmt;
2708 if (shiftAmt >= sizeof(Element) * 8) {
2709 shiftAmt = sizeof(Element) * 8 - 1;
2710 destElem = 0;
2711 } else {
2712 destElem = (srcElem1 >> shiftAmt);
2713 }
2714 // Make sure the right shift sign extended when it should.
2715 if (ltz(srcElem1) && !ltz(destElem)) {
2716 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2717 1 - shiftAmt));
2718 }
2719 } else {
2720 if (shiftAmt >= sizeof(Element) * 8) {
2721 destElem = 0;
2722 } else {
2723 destElem = srcElem1 << shiftAmt;
2724 }
2725 }
2726 '''
2727 threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2,
2728 shlCode)
2729 threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4,
2730 shlCode)
2731 # SSHLL, SSHLL2
2732 shllCode = '''
2733 if (imm >= sizeof(destElem) * 8) {
2734 destElem = 0;
2735 } else {
2736 destElem = (BigElement)srcElem1 << imm;
2737 }
2738 '''
2739 twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes,
2740 shllCode, hasImm=True)
2741 twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes,
2742 shllCode, hasImm=True, hi=True)
2743 # SSHR
2744 shrCode = '''
2745 if (imm >= sizeof(srcElem1) * 8) {
2746 if (ltz(srcElem1))
2747 destElem = -1;
2748 else
2749 destElem = 0;
2750 } else {
2751 destElem = srcElem1 >> imm;
2752 }
2753 '''
2754 twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode,
2755 hasImm=True)
2756 twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode,
2757 hasImm=True)
2758 # SSRA
2759 sraCode = '''
2760 Element mid;;
2761 if (imm >= sizeof(srcElem1) * 8) {
2762 mid = ltz(srcElem1) ? -1 : 0;
2763 } else {
2764 mid = srcElem1 >> imm;
2765 if (ltz(srcElem1) && !ltz(mid)) {
2766 mid |= -(mid & ((Element)1 <<
2767 (sizeof(Element) * 8 - 1 - imm)));
2768 }
2769 }
2770 destElem += mid;
2771 '''
2772 twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode,
2773 True, hasImm=True)
2774 twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode,
2775 True, hasImm=True)
2776 # SSUBL
2777 sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;"
2778 threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes,
2779 sublwCode)
2780 threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes,
2781 sublwCode, hi=True)
2782 # SSUBW
2783 threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes,
2784 sublwCode)
2785 threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes,
2786 sublwCode, hi=True)
2787 # SUB
2788 subCode = "destElem = srcElem1 - srcElem2;"
2789 threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode)
2790 threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode)
2791 # SUBHN, SUBHN2
2792 subhnCode = '''
2793 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2794 (sizeof(Element) * 8);
2795 '''
2796 threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes,
2797 subhnCode)
2798 threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes,
2799 subhnCode, hi=True)
2800 # SUQADD
2801 suqaddCode = '''
2802 FPSCR fpscr = (FPSCR) FpscrQc;
2803 Element tmp = destElem + srcElem1;
2804 if (bits(destElem, sizeof(Element) * 8 - 1) == 0) {
2805 if (bits(tmp, sizeof(Element) * 8 - 1) == 1 ||
2806 tmp < srcElem1 || tmp < destElem) {
2807 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
2808 fpscr.qc = 1;
2809 } else {
2810 destElem = tmp;
2811 }
2812 } else {
2813 Element absDestElem = (~destElem) + 1;
2814 if (absDestElem < srcElem1) {
2815 // Still check for positive sat., no need to check for negative sat.
2816 if (bits(tmp, sizeof(Element) * 8 - 1) == 1) {
2817 destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
2818 fpscr.qc = 1;
2819 } else {
2820 destElem = tmp;
2821 }
2822 } else {
2823 destElem = tmp;
2824 }
2825 }
2826 FpscrQc = fpscr;
2827 '''
2828 twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
2829 suqaddCode, True)
2830 twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4,
2831 suqaddCode, True)
2832 twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4,
2833 suqaddCode, True, scalar=True)
2834 # SXTL -> alias to SSHLL
2835 # TBL
2836 tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2)
2837 tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4)
2838 tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2)
2839 tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4)
2840 tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2)
2841 tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4)
2842 tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2)
2843 tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4)
2844 # TBX
2845 tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2)
2846 tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4)
2847 tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2)
2848 tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4)
2849 tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2)
2850 tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4)
2851 tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2)
2852 tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4)
2853 # TRN1
2854 trnCode = '''
2855 unsigned part = %s;
2856 for (unsigned i = 0; i < eCount / 2; i++) {
2857 destReg.elements[2 * i] = srcReg1.elements[2 * i + part];
2858 destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part];
2859 }
2860 '''
2861 threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2,
2862 trnCode % "0")
2863 threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4,
2864 trnCode % "0")
2865 # TRN2
2866 threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2,
2867 trnCode % "1")
2868 threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4,
2869 trnCode % "1")
2870 # UABA
2871 threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2,
2872 abaCode, True)
2873 threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4,
2874 abaCode, True)
2875 # UABAL, UABAL2
2876 threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes,
2877 abalCode, True)
2878 threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes,
2879 abalCode, True, hi=True)
2880 # UABD
2881 threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2,
2882 abdCode)
2883 threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4,
2884 abdCode)
2885 # UABDL, UABDL2
2886 threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes,
2887 abdlCode, True)
2888 threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes,
2889 abdlCode, True, hi=True)
2890 # UADALP
2891 twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes,
2892 2, adalpCode, True)
2893 twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes,
2894 4, adalpCode, True)
2895 # UADDL, UADDL2
2896 threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes,
2897 addlwCode)
2898 threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes,
2899 addlwCode, hi=True)
2900 # UADDLP
2901 twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes,
2902 2, addlwCode)
2903 twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes,
2904 4, addlwCode)
2905 # UADDLV
2906 twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp",
2907 ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True)
2908 twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp",
2909 ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True)
2910 twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4,
2911 addAcrossLongCode, doubleDest=True, long=True)
2912 # UADDW
2913 threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes,
2914 addlwCode)
2915 threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes,
2916 addlwCode, hi=True)
2917 # UCVTF (fixed-point)
2918 ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true,"
2919 " FPCRRounding(fpscr), fpscr)")
2920 twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
2921 ucvtfFixedCode, hasImm=True)
2922 twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4,
2923 ucvtfFixedCode, hasImm=True)
2924 twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4,
2925 ucvtfFixedCode, hasImm=True, scalar=True)
2926 # UCVTF (integer)
2927 ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true,"
2928 " FPCRRounding(fpscr), fpscr)")
2929 twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
2930 ucvtfIntCode)
2931 twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4,
2932 ucvtfIntCode)
2933 twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4,
2934 ucvtfIntCode, scalar=True)
2935 # UHADD
2936 threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2,
2937 haddCode)
2938 threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4,
2939 haddCode)
2940 # UHSUB
2941 threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2,
2942 hsubCode)
2943 threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4,
2944 hsubCode)
2945 # UMAX
2946 threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2,
2947 maxCode)
2948 threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4,
2949 maxCode)
2950 # UMAXP
2951 threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2,
2952 maxCode, pairwise=True)
2953 threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4,
2954 maxCode, pairwise=True)
2955 # UMAXV
2956 twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
2957 2, maxAcrossCode)
2958 twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4,
2959 maxAcrossCode)
2960 # UMIN
2961 threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2,
2962 minCode)
2963 threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4,
2964 minCode)
2965 # UMINP
2966 threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2,
2967 minCode, pairwise=True)
2968 threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4,
2969 minCode, pairwise=True)
2970 # UMINV
2971 twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
2972 2, minAcrossCode)
2973 twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4,
2974 minAcrossCode)
2975 # UMLAL (by element)
2976 threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp",
2977 smallUnsignedTypes, mlalCode, True, byElem=True)
2978 threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp",
2979 smallUnsignedTypes, mlalCode, True, byElem=True, hi=True)
2980 # UMLAL (vector)
2981 threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes,
2982 mlalCode, True)
2983 threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes,
2984 mlalCode, True, hi=True)
2985 # UMLSL (by element)
2986 threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp",
2987 smallUnsignedTypes, mlslCode, True, byElem=True)
2988 threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp",
2989 smallUnsignedTypes, mlslCode, True, byElem=True, hi=True)
2990 # UMLSL (vector)
2991 threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes,
2992 mlslCode, True)
2993 threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes,
2994 mlslCode, True, hi=True)
2995 # UMOV
2996 insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
2997 insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X')
2998 # UMULL, UMULL2 (by element)
2999 threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes,
3000 mullCode, byElem=True)
3001 threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes,
3002 mullCode, byElem=True, hi=True)
3003 # UMULL, UMULL2 (vector)
3004 threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes,
3005 mullCode)
3006 threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes,
3007 mullCode, hi=True)
3008 # UQADD
3009 uqaddCode = '''
3010 destElem = srcElem1 + srcElem2;
3011 FPSCR fpscr = (FPSCR) FpscrQc;
3012 if (destElem < srcElem1 || destElem < srcElem2) {
3013 destElem = (Element)(-1);
3014 fpscr.qc = 1;
3015 }
3016 FpscrQc = fpscr;
3017 '''
3018 threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
3019 uqaddCode)
3020 threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4,
3021 uqaddCode)
3022 threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4,
3023 uqaddCode, scalar=True)
3024 # UQRSHL
3025 uqrshlCode = '''
3026 int16_t shiftAmt = (int8_t)srcElem2;
3027 FPSCR fpscr = (FPSCR) FpscrQc;
3028 if (shiftAmt < 0) {
3029 shiftAmt = -shiftAmt;
3030 Element rBit = 0;
3031 if (shiftAmt <= sizeof(Element) * 8)
3032 rBit = bits(srcElem1, shiftAmt - 1);
3033 if (shiftAmt >= sizeof(Element) * 8) {
3034 shiftAmt = sizeof(Element) * 8 - 1;
3035 destElem = 0;
3036 } else {
3037 destElem = (srcElem1 >> shiftAmt);
3038 }
3039 destElem += rBit;
3040 } else {
3041 if (shiftAmt >= sizeof(Element) * 8) {
3042 if (srcElem1 != 0) {
3043 destElem = mask(sizeof(Element) * 8);
3044 fpscr.qc = 1;
3045 } else {
3046 destElem = 0;
3047 }
3048 } else {
3049 if (bits(srcElem1, sizeof(Element) * 8 - 1,
3050 sizeof(Element) * 8 - shiftAmt)) {
3051 destElem = mask(sizeof(Element) * 8);
3052 fpscr.qc = 1;
3053 } else {
3054 destElem = srcElem1 << shiftAmt;
3055 }
3056 }
3057 }
3058 FpscrQc = fpscr;
3059 '''
3060 threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes,
3061 2, uqrshlCode)
3062 threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4,
3063 uqrshlCode)
3064 threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4,
3065 uqrshlCode, scalar=True)
3066 # UQRSHRN
3067 uqrshrnCode = '''
3068 FPSCR fpscr = (FPSCR) FpscrQc;
3069 if (imm > sizeof(srcElem1) * 8) {
3070 if (srcElem1 != 0)
3071 fpscr.qc = 1;
3072 destElem = 0;
3073 } else if (imm) {
3074 BigElement mid = (srcElem1 >> (imm - 1));
3075 uint64_t rBit = mid & 0x1;
3076 mid >>= 1;
3077 mid += rBit;
3078 if (mid != (Element)mid) {
3079 destElem = mask(sizeof(Element) * 8);
3080 fpscr.qc = 1;
3081 } else {
3082 destElem = mid;
3083 }
3084 } else {
3085 if (srcElem1 != (Element)srcElem1) {
3086 destElem = mask(sizeof(Element) * 8 - 1);
3087 fpscr.qc = 1;
3088 } else {
3089 destElem = srcElem1;
3090 }
3091 }
3092 FpscrQc = fpscr;
3093 '''
3094 twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes,
3095 uqrshrnCode, hasImm=True)
3096 twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp",
3097 smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True)
3098 twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp",
3099 smallUnsignedTypes, uqrshrnCode, hasImm=True,
3100 scalar=True)
3101 # UQSHL (immediate)
3102 uqshlImmCode = '''
3103 FPSCR fpscr = (FPSCR) FpscrQc;
3104 if (imm >= sizeof(Element) * 8) {
3105 if (srcElem1 != 0) {
3106 destElem = mask(sizeof(Element) * 8);
3107 fpscr.qc = 1;
3108 } else {
3109 destElem = 0;
3110 }
3111 } else if (imm) {
3112 destElem = (srcElem1 << imm);
3113 uint64_t topBits = bits((uint64_t)srcElem1,
3114 sizeof(Element) * 8 - 1,
3115 sizeof(Element) * 8 - imm);
3116 if (topBits != 0) {
3117 destElem = mask(sizeof(Element) * 8);
3118 fpscr.qc = 1;
3119 }
3120 } else {
3121 destElem = srcElem1;
3122 }
3123 FpscrQc = fpscr;
3124 '''
3125 twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2,
3126 uqshlImmCode, hasImm=True)
3127 twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4,
3128 uqshlImmCode, hasImm=True)
3129 twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4,
3130 uqshlImmCode, hasImm=True, scalar=True)
3131 # UQSHL (register)
3132 uqshlCode = '''
3133 int16_t shiftAmt = (int8_t)srcElem2;
3134 FPSCR fpscr = (FPSCR) FpscrQc;
3135 if (shiftAmt < 0) {
3136 shiftAmt = -shiftAmt;
3137 if (shiftAmt >= sizeof(Element) * 8) {
3138 shiftAmt = sizeof(Element) * 8 - 1;
3139 destElem = 0;
3140 } else {
3141 destElem = (srcElem1 >> shiftAmt);
3142 }
3143 } else if (shiftAmt > 0) {
3144 if (shiftAmt >= sizeof(Element) * 8) {
3145 if (srcElem1 != 0) {
3146 destElem = mask(sizeof(Element) * 8);
3147 fpscr.qc = 1;
3148 } else {
3149 destElem = 0;
3150 }
3151 } else {
3152 if (bits(srcElem1, sizeof(Element) * 8 - 1,
3153 sizeof(Element) * 8 - shiftAmt)) {
3154 destElem = mask(sizeof(Element) * 8);
3155 fpscr.qc = 1;
3156 } else {
3157 destElem = srcElem1 << shiftAmt;
3158 }
3159 }
3160 } else {
3161 destElem = srcElem1;
3162 }
3163 FpscrQc = fpscr;
3164 '''
3165 threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2,
3166 uqshlCode)
3167 threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4,
3168 uqshlCode)
3169 threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4,
3170 uqshlCode, scalar=True)
3171 # UQSHRN, UQSHRN2
3172 uqshrnCode = '''
3173 FPSCR fpscr = (FPSCR) FpscrQc;
3174 if (imm > sizeof(srcElem1) * 8) {
3175 if (srcElem1 != 0)
3176 fpscr.qc = 1;
3177 destElem = 0;
3178 } else if (imm) {
3179 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3180 if (mid != (Element)mid) {
3181 destElem = mask(sizeof(Element) * 8);
3182 fpscr.qc = 1;
3183 } else {
3184 destElem = mid;
3185 }
3186 } else {
3187 destElem = srcElem1;
3188 }
3189 FpscrQc = fpscr;
3190 '''
3191 twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes,
3192 uqshrnCode, hasImm=True)
3193 twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes,
3194 uqshrnCode, hasImm=True, hi=True)
3195 twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes,
3196 uqshrnCode, hasImm=True, scalar=True)
3197 # UQSUB
3198 uqsubCode = '''
3199 destElem = srcElem1 - srcElem2;
3200 FPSCR fpscr = (FPSCR) FpscrQc;
3201 if (destElem > srcElem1) {
3202 destElem = 0;
3203 fpscr.qc = 1;
3204 }
3205 FpscrQc = fpscr;
3206 '''
3207 threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2,
3208 uqsubCode)
3209 threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4,
3210 uqsubCode)
3211 threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4,
3212 uqsubCode, scalar=True)
3213 # UQXTN
3214 uqxtnCode = '''
3215 FPSCR fpscr = (FPSCR) FpscrQc;
3216 destElem = srcElem1;
3217 if ((BigElement)destElem != srcElem1) {
3218 fpscr.qc = 1;
3219 destElem = mask(sizeof(Element) * 8);
3220 }
3221 FpscrQc = fpscr;
3222 '''
3223 twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes,
3224 uqxtnCode)
3225 twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes,
3226 uqxtnCode, hi=True)
3227 twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes,
3228 uqxtnCode, scalar=True)
3229 # URECPE
3230 urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);"
3231 twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2,
3232 urecpeCode)
3233 twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4,
3234 urecpeCode)
3235 # URHADD
3236 threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes,
3237 2, rhaddCode)
3238 threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes,
3239 4, rhaddCode)
3240 # URSHL
3241 threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2,
3242 rshlCode)
3243 threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4,
3244 rshlCode)
3245 # URSHR
3246 twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2,
3247 rshrCode, hasImm=True)
3248 twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4,
3249 rshrCode, hasImm=True)
3250 # URSQRTE
3251 ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);"
3252 twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2,
3253 ursqrteCode)
3254 twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4,
3255 ursqrteCode)
3256 # URSRA
3257 twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2,
3258 rsraCode, True, hasImm=True)
3259 twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4,
3260 rsraCode, True, hasImm=True)
3261 # USHL
3262 threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2,
3263 shlCode)
3264 threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4,
3265 shlCode)
3266 # USHLL, USHLL2
3267 twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes,
3268 shllCode, hasImm=True)
3269 twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes,
3270 shllCode, hi=True, hasImm=True)
3271 # USHR
3272 twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2,
3273 shrCode, hasImm=True)
3274 twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4,
3275 shrCode, hasImm=True)
3276 # USQADD
3277 usqaddCode = '''
3278 FPSCR fpscr = (FPSCR) FpscrQc;
3279 Element tmp = destElem + srcElem1;
3280 if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) {
3281 if (tmp < srcElem1 || tmp < destElem) {
3282 destElem = (Element)(-1);
3283 fpscr.qc = 1;
3284 } else {
3285 destElem = tmp;
3286 }
3287 } else {
3288 Element absSrcElem1 = (~srcElem1) + 1;
3289 if (absSrcElem1 > destElem) {
3290 destElem = 0;
3291 fpscr.qc = 1;
3292 } else {
3293 destElem = tmp;
3294 }
3295 }
3296 FpscrQc = fpscr;
3297 '''
3298 twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
3299 usqaddCode, True)
3300 twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4,
3301 usqaddCode, True)
3302 twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4,
3303 usqaddCode, True, scalar=True)
3304 # USRA
3305 twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2,
3306 sraCode, True, hasImm=True)
3307 twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4,
3308 sraCode, True, hasImm=True)
3309 # USUBL
3310 threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes,
3311 sublwCode)
3312 threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes,
3313 sublwCode, hi=True)
3314 # USUBW
3315 threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes,
3316 sublwCode)
3317 threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes,
3318 sublwCode, hi=True)
3319 # UXTL -> alias to USHLL
3320 # UZP1
3321 uzpCode = '''
3322 unsigned part = %s;
3323 for (unsigned i = 0; i < eCount / 2; i++) {
3324 destReg.elements[i] = srcReg1.elements[2 * i + part];
3325 destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part];
3326 }
3327 '''
3328 threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2,
3329 uzpCode % "0")
3330 threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4,
3331 uzpCode % "0")
3332 # UZP2
3333 threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2,
3334 uzpCode % "1")
3335 threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4,
3336 uzpCode % "1")
3337 # XTN, XTN2
3338 xtnCode = "destElem = srcElem1;"
3339 twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode)
3340 twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes,
3341 xtnCode, hi=True)
3342 # ZIP1
3343 zipCode = '''
3344 unsigned base = %s;
3345 for (unsigned i = 0; i < eCount / 2; i++) {
3346 destReg.elements[2 * i] = srcReg1.elements[base + i];
3347 destReg.elements[2 * i + 1] = srcReg2.elements[base + i];
3348 }
3349 '''
3350 threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2,
3351 zipCode % "0")
3352 threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4,
3353 zipCode % "0")
3354 # ZIP2
3355 threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2,
3356 zipCode % "eCount / 2")
3357 threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4,
3358 zipCode % "eCount / 2")
3359
3360 for decoderFlavour, type_dict in decoders.iteritems():
3361 header_output += '''
3362 class %(decoder_flavour)sDecoder {
3363 public:
3364 ''' % { "decoder_flavour" : decoderFlavour }
3365 for type,name in type_dict.iteritems():
3366 header_output += '''
3367 template<typename Elem> using %(type)s = %(new_name)s<Elem>;''' % {
3368 "type" : type, "new_name" : name
3369 }
3370 header_output += '''
3371 };'''
3372}};