neon.isa (7783:9b880b40ac10) neon.isa (7853:69aae4379062)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (isSnan(op1) || isSnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (isSnan(op1) || isSnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (isSnan(op1) || isSnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (isSnan(op1) || isSnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (isSnan(op1) || isSnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (isSnan(op1) || isSnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'
704 else:
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
707 eWalkCode += '''
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
711 if readDest:
712 if toInt:
713 eWalkCode += '''
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
716 else:
717 eWalkCode += '''
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
720 readDestCode = ''
721 if readDest:
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
725 if toInt:
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
728 if pairwise:
729 eWalkCode += '''
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
736 %(readDest)s
737 %(op)s
738 %(writeDest)s
739 }
740 ''' % { "op" : op,
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
744 else:
745 eWalkCode += '''
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
750 %(readDest)s
751 %(op)s
752 %(writeDest)s
753 }
754 ''' % { "op" : op,
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
759 if toInt:
760 eWalkCode += '''
761 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:
789 src2Cnt = 4
790 src2Prefix = 'Big'
791 if bigDest:
792 destCnt = 4
793 destPrefix = 'Big'
794 eWalkCode = simdEnabledCheckCode + '''
795 %sRegVect srcReg1;
796 %sRegVect srcReg2;
797 %sRegVect destReg;
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
800 eWalkCode += '''
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
804 eWalkCode += '''
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
806 ''' % { "reg" : reg }
807 if readDest:
808 for reg in range(destCnt):
809 eWalkCode += '''
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
811 ''' % { "reg" : reg }
812 readDestCode = ''
813 if readDest:
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815 eWalkCode += '''
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
820 %(readDest)s
821 %(op)s
822 destReg.elements[i] = htog(destElem);
823 }
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
847
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
851
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
855
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
865 ''' % { "reg" : reg }
866 if readDest:
867 eWalkCode += '''
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
869 ''' % { "reg" : reg }
870 readDestCode = ''
871 if readDest:
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873 eWalkCode += '''
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (isSnan(op1) || isSnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (isSnan(op1) || isSnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (isSnan(op1) || isSnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (isSnan(op1) || isSnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (isSnan(op1) || isSnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (isSnan(op1) || isSnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'
704 else:
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
707 eWalkCode += '''
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
711 if readDest:
712 if toInt:
713 eWalkCode += '''
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
716 else:
717 eWalkCode += '''
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
720 readDestCode = ''
721 if readDest:
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
725 if toInt:
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
728 if pairwise:
729 eWalkCode += '''
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
736 %(readDest)s
737 %(op)s
738 %(writeDest)s
739 }
740 ''' % { "op" : op,
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
744 else:
745 eWalkCode += '''
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
750 %(readDest)s
751 %(op)s
752 %(writeDest)s
753 }
754 ''' % { "op" : op,
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
759 if toInt:
760 eWalkCode += '''
761 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:
789 src2Cnt = 4
790 src2Prefix = 'Big'
791 if bigDest:
792 destCnt = 4
793 destPrefix = 'Big'
794 eWalkCode = simdEnabledCheckCode + '''
795 %sRegVect srcReg1;
796 %sRegVect srcReg2;
797 %sRegVect destReg;
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
800 eWalkCode += '''
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
804 eWalkCode += '''
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
806 ''' % { "reg" : reg }
807 if readDest:
808 for reg in range(destCnt):
809 eWalkCode += '''
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
811 ''' % { "reg" : reg }
812 readDestCode = ''
813 if readDest:
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815 eWalkCode += '''
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
820 %(readDest)s
821 %(op)s
822 destReg.elements[i] = htog(destElem);
823 }
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
847
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
851
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
855
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
865 ''' % { "reg" : reg }
866 if readDest:
867 eWalkCode += '''
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
869 ''' % { "reg" : reg }
870 readDestCode = ''
871 if readDest:
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873 eWalkCode += '''
874 assert(imm >= 0 && imm < eCount);
875 for (unsigned i = 0; i < eCount; i++) {
876 Element srcElem1 = gtoh(srcReg1.elements[i]);
877 Element srcElem2 = gtoh(srcReg2.elements[imm]);
878 Element destElem;
879 %(readDest)s
880 %(op)s
881 destReg.elements[i] = htog(destElem);
874 if (imm < 0 && imm >= eCount) {
875#if FULL_SYSTEM
876 fault = new UndefinedInstruction;
877#else
878 fault = new UndefinedInstruction(false, mnemonic);
879#endif
880 } else {
881 for (unsigned i = 0; i < eCount; i++) {
882 Element srcElem1 = gtoh(srcReg1.elements[i]);
883 Element srcElem2 = gtoh(srcReg2.elements[imm]);
884 Element destElem;
885 %(readDest)s
886 %(op)s
887 destReg.elements[i] = htog(destElem);
888 }
882 }
883 ''' % { "op" : op, "readDest" : readDestCode }
884 for reg in range(rCount):
885 eWalkCode += '''
886 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
887 ''' % { "reg" : reg }
888 iop = InstObjParams(name, Name,
889 "RegRegRegImmOp",
890 { "code": eWalkCode,
891 "r_count": rCount,
892 "predicate_test": predicateTest,
893 "op_class": opClass }, [])
894 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
895 exec_output += NeonEqualRegExecute.subst(iop)
896 for type in types:
897 substDict = { "targs" : type,
898 "class_name" : Name }
899 exec_output += NeonExecDeclare.subst(substDict)
900
901 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
902 global header_output, exec_output
903 rCount = 2
904 eWalkCode = simdEnabledCheckCode + '''
905 RegVect srcReg1, srcReg2;
906 BigRegVect destReg;
907 '''
908 for reg in range(rCount):
909 eWalkCode += '''
910 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
911 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
912 ''' % { "reg" : reg }
913 if readDest:
914 for reg in range(2 * rCount):
915 eWalkCode += '''
916 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
917 ''' % { "reg" : reg }
918 readDestCode = ''
919 if readDest:
920 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
921 eWalkCode += '''
889 }
890 ''' % { "op" : op, "readDest" : readDestCode }
891 for reg in range(rCount):
892 eWalkCode += '''
893 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
894 ''' % { "reg" : reg }
895 iop = InstObjParams(name, Name,
896 "RegRegRegImmOp",
897 { "code": eWalkCode,
898 "r_count": rCount,
899 "predicate_test": predicateTest,
900 "op_class": opClass }, [])
901 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
902 exec_output += NeonEqualRegExecute.subst(iop)
903 for type in types:
904 substDict = { "targs" : type,
905 "class_name" : Name }
906 exec_output += NeonExecDeclare.subst(substDict)
907
908 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
909 global header_output, exec_output
910 rCount = 2
911 eWalkCode = simdEnabledCheckCode + '''
912 RegVect srcReg1, srcReg2;
913 BigRegVect destReg;
914 '''
915 for reg in range(rCount):
916 eWalkCode += '''
917 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
918 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
919 ''' % { "reg" : reg }
920 if readDest:
921 for reg in range(2 * rCount):
922 eWalkCode += '''
923 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
924 ''' % { "reg" : reg }
925 readDestCode = ''
926 if readDest:
927 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
928 eWalkCode += '''
922 assert(imm >= 0 && imm < eCount);
923 for (unsigned i = 0; i < eCount; i++) {
924 Element srcElem1 = gtoh(srcReg1.elements[i]);
925 Element srcElem2 = gtoh(srcReg2.elements[imm]);
926 BigElement destElem;
927 %(readDest)s
928 %(op)s
929 destReg.elements[i] = htog(destElem);
929 if (imm < 0 && imm >= eCount) {
930#if FULL_SYSTEM
931 fault = new UndefinedInstruction;
932#else
933 fault = new UndefinedInstruction(false, mnemonic);
934#endif
935 } else {
936 for (unsigned i = 0; i < eCount; i++) {
937 Element srcElem1 = gtoh(srcReg1.elements[i]);
938 Element srcElem2 = gtoh(srcReg2.elements[imm]);
939 BigElement destElem;
940 %(readDest)s
941 %(op)s
942 destReg.elements[i] = htog(destElem);
943 }
930 }
931 ''' % { "op" : op, "readDest" : readDestCode }
932 for reg in range(2 * rCount):
933 eWalkCode += '''
934 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
935 ''' % { "reg" : reg }
936 iop = InstObjParams(name, Name,
937 "RegRegRegImmOp",
938 { "code": eWalkCode,
939 "r_count": rCount,
940 "predicate_test": predicateTest,
941 "op_class": opClass }, [])
942 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
943 exec_output += NeonUnequalRegExecute.subst(iop)
944 for type in types:
945 substDict = { "targs" : type,
946 "class_name" : Name }
947 exec_output += NeonExecDeclare.subst(substDict)
948
949 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
950 global header_output, exec_output
951 eWalkCode = simdEnabledCheckCode + '''
952 typedef FloatReg FloatVect[rCount];
953 FloatVect srcRegs1, srcRegs2, destRegs;
954 '''
955 for reg in range(rCount):
956 eWalkCode += '''
957 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
958 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
959 ''' % { "reg" : reg }
960 if readDest:
961 eWalkCode += '''
962 destRegs[%(reg)d] = FpDestP%(reg)d;
963 ''' % { "reg" : reg }
964 readDestCode = ''
965 if readDest:
966 readDestCode = 'destReg = destRegs[i];'
967 eWalkCode += '''
944 }
945 ''' % { "op" : op, "readDest" : readDestCode }
946 for reg in range(2 * rCount):
947 eWalkCode += '''
948 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
949 ''' % { "reg" : reg }
950 iop = InstObjParams(name, Name,
951 "RegRegRegImmOp",
952 { "code": eWalkCode,
953 "r_count": rCount,
954 "predicate_test": predicateTest,
955 "op_class": opClass }, [])
956 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
957 exec_output += NeonUnequalRegExecute.subst(iop)
958 for type in types:
959 substDict = { "targs" : type,
960 "class_name" : Name }
961 exec_output += NeonExecDeclare.subst(substDict)
962
963 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
964 global header_output, exec_output
965 eWalkCode = simdEnabledCheckCode + '''
966 typedef FloatReg FloatVect[rCount];
967 FloatVect srcRegs1, srcRegs2, destRegs;
968 '''
969 for reg in range(rCount):
970 eWalkCode += '''
971 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
972 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
973 ''' % { "reg" : reg }
974 if readDest:
975 eWalkCode += '''
976 destRegs[%(reg)d] = FpDestP%(reg)d;
977 ''' % { "reg" : reg }
978 readDestCode = ''
979 if readDest:
980 readDestCode = 'destReg = destRegs[i];'
981 eWalkCode += '''
968 assert(imm >= 0 && imm < rCount);
969 for (unsigned i = 0; i < rCount; i++) {
970 FloatReg srcReg1 = srcRegs1[i];
971 FloatReg srcReg2 = srcRegs2[imm];
972 FloatReg destReg;
973 %(readDest)s
974 %(op)s
975 destRegs[i] = destReg;
982 if (imm < 0 && imm >= eCount) {
983#if FULL_SYSTEM
984 fault = new UndefinedInstruction;
985#else
986 fault = new UndefinedInstruction(false, mnemonic);
987#endif
988 } else {
989 for (unsigned i = 0; i < rCount; i++) {
990 FloatReg srcReg1 = srcRegs1[i];
991 FloatReg srcReg2 = srcRegs2[imm];
992 FloatReg destReg;
993 %(readDest)s
994 %(op)s
995 destRegs[i] = destReg;
996 }
976 }
977 ''' % { "op" : op, "readDest" : readDestCode }
978 for reg in range(rCount):
979 eWalkCode += '''
980 FpDestP%(reg)d = destRegs[%(reg)d];
981 ''' % { "reg" : reg }
982 iop = InstObjParams(name, Name,
983 "FpRegRegRegImmOp",
984 { "code": eWalkCode,
985 "r_count": rCount,
986 "predicate_test": predicateTest,
987 "op_class": opClass }, [])
988 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
989 exec_output += NeonEqualRegExecute.subst(iop)
990 for type in types:
991 substDict = { "targs" : type,
992 "class_name" : Name }
993 exec_output += NeonExecDeclare.subst(substDict)
994
995 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
996 readDest=False, toInt=False, fromInt=False):
997 global header_output, exec_output
998 eWalkCode = simdEnabledCheckCode + '''
999 RegVect srcRegs1, destRegs;
1000 '''
1001 for reg in range(rCount):
1002 eWalkCode += '''
1003 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1004 ''' % { "reg" : reg }
1005 if readDest:
1006 eWalkCode += '''
1007 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1008 ''' % { "reg" : reg }
1009 readDestCode = ''
1010 if readDest:
1011 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1012 if toInt:
1013 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1014 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1015 if fromInt:
1016 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1017 declDest = 'Element destElem;'
1018 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1019 if toInt:
1020 declDest = 'FloatRegBits destReg;'
1021 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1022 eWalkCode += '''
1023 for (unsigned i = 0; i < eCount; i++) {
1024 %(readOp)s
1025 %(declDest)s
1026 %(readDest)s
1027 %(op)s
1028 %(writeDest)s
1029 }
1030 ''' % { "readOp" : readOpCode,
1031 "declDest" : declDest,
1032 "readDest" : readDestCode,
1033 "op" : op,
1034 "writeDest" : writeDestCode }
1035 for reg in range(rCount):
1036 eWalkCode += '''
1037 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1038 ''' % { "reg" : reg }
1039 iop = InstObjParams(name, Name,
1040 "RegRegImmOp",
1041 { "code": eWalkCode,
1042 "r_count": rCount,
1043 "predicate_test": predicateTest,
1044 "op_class": opClass }, [])
1045 header_output += NeonRegRegImmOpDeclare.subst(iop)
1046 exec_output += NeonEqualRegExecute.subst(iop)
1047 for type in types:
1048 substDict = { "targs" : type,
1049 "class_name" : Name }
1050 exec_output += NeonExecDeclare.subst(substDict)
1051
1052 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1053 global header_output, exec_output
1054 eWalkCode = simdEnabledCheckCode + '''
1055 BigRegVect srcReg1;
1056 RegVect destReg;
1057 '''
1058 for reg in range(4):
1059 eWalkCode += '''
1060 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1061 ''' % { "reg" : reg }
1062 if readDest:
1063 for reg in range(2):
1064 eWalkCode += '''
1065 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1066 ''' % { "reg" : reg }
1067 readDestCode = ''
1068 if readDest:
1069 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1070 eWalkCode += '''
1071 for (unsigned i = 0; i < eCount; i++) {
1072 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1073 Element destElem;
1074 %(readDest)s
1075 %(op)s
1076 destReg.elements[i] = htog(destElem);
1077 }
1078 ''' % { "op" : op, "readDest" : readDestCode }
1079 for reg in range(2):
1080 eWalkCode += '''
1081 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1082 ''' % { "reg" : reg }
1083 iop = InstObjParams(name, Name,
1084 "RegRegImmOp",
1085 { "code": eWalkCode,
1086 "r_count": 2,
1087 "predicate_test": predicateTest,
1088 "op_class": opClass }, [])
1089 header_output += NeonRegRegImmOpDeclare.subst(iop)
1090 exec_output += NeonUnequalRegExecute.subst(iop)
1091 for type in types:
1092 substDict = { "targs" : type,
1093 "class_name" : Name }
1094 exec_output += NeonExecDeclare.subst(substDict)
1095
1096 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1097 global header_output, exec_output
1098 eWalkCode = simdEnabledCheckCode + '''
1099 RegVect srcReg1;
1100 BigRegVect destReg;
1101 '''
1102 for reg in range(2):
1103 eWalkCode += '''
1104 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1105 ''' % { "reg" : reg }
1106 if readDest:
1107 for reg in range(4):
1108 eWalkCode += '''
1109 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1110 ''' % { "reg" : reg }
1111 readDestCode = ''
1112 if readDest:
1113 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1114 eWalkCode += '''
1115 for (unsigned i = 0; i < eCount; i++) {
1116 Element srcElem1 = gtoh(srcReg1.elements[i]);
1117 BigElement destElem;
1118 %(readDest)s
1119 %(op)s
1120 destReg.elements[i] = htog(destElem);
1121 }
1122 ''' % { "op" : op, "readDest" : readDestCode }
1123 for reg in range(4):
1124 eWalkCode += '''
1125 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1126 ''' % { "reg" : reg }
1127 iop = InstObjParams(name, Name,
1128 "RegRegImmOp",
1129 { "code": eWalkCode,
1130 "r_count": 2,
1131 "predicate_test": predicateTest,
1132 "op_class": opClass }, [])
1133 header_output += NeonRegRegImmOpDeclare.subst(iop)
1134 exec_output += NeonUnequalRegExecute.subst(iop)
1135 for type in types:
1136 substDict = { "targs" : type,
1137 "class_name" : Name }
1138 exec_output += NeonExecDeclare.subst(substDict)
1139
1140 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1141 global header_output, exec_output
1142 eWalkCode = simdEnabledCheckCode + '''
1143 RegVect srcReg1, destReg;
1144 '''
1145 for reg in range(rCount):
1146 eWalkCode += '''
1147 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1148 ''' % { "reg" : reg }
1149 if readDest:
1150 eWalkCode += '''
1151 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1152 ''' % { "reg" : reg }
1153 readDestCode = ''
1154 if readDest:
1155 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1156 eWalkCode += '''
1157 for (unsigned i = 0; i < eCount; i++) {
1158 unsigned j = i;
1159 Element srcElem1 = gtoh(srcReg1.elements[i]);
1160 Element destElem;
1161 %(readDest)s
1162 %(op)s
1163 destReg.elements[j] = htog(destElem);
1164 }
1165 ''' % { "op" : op, "readDest" : readDestCode }
1166 for reg in range(rCount):
1167 eWalkCode += '''
1168 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1169 ''' % { "reg" : reg }
1170 iop = InstObjParams(name, Name,
1171 "RegRegOp",
1172 { "code": eWalkCode,
1173 "r_count": rCount,
1174 "predicate_test": predicateTest,
1175 "op_class": opClass }, [])
1176 header_output += NeonRegRegOpDeclare.subst(iop)
1177 exec_output += NeonEqualRegExecute.subst(iop)
1178 for type in types:
1179 substDict = { "targs" : type,
1180 "class_name" : Name }
1181 exec_output += NeonExecDeclare.subst(substDict)
1182
1183 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1184 global header_output, exec_output
1185 eWalkCode = simdEnabledCheckCode + '''
1186 RegVect srcReg1, destReg;
1187 '''
1188 for reg in range(rCount):
1189 eWalkCode += '''
1190 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1191 ''' % { "reg" : reg }
1192 if readDest:
1193 eWalkCode += '''
1194 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1195 ''' % { "reg" : reg }
1196 readDestCode = ''
1197 if readDest:
1198 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1199 eWalkCode += '''
1200 for (unsigned i = 0; i < eCount; i++) {
1201 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1202 Element destElem;
1203 %(readDest)s
1204 %(op)s
1205 destReg.elements[i] = htog(destElem);
1206 }
1207 ''' % { "op" : op, "readDest" : readDestCode }
1208 for reg in range(rCount):
1209 eWalkCode += '''
1210 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1211 ''' % { "reg" : reg }
1212 iop = InstObjParams(name, Name,
1213 "RegRegImmOp",
1214 { "code": eWalkCode,
1215 "r_count": rCount,
1216 "predicate_test": predicateTest,
1217 "op_class": opClass }, [])
1218 header_output += NeonRegRegImmOpDeclare.subst(iop)
1219 exec_output += NeonEqualRegExecute.subst(iop)
1220 for type in types:
1221 substDict = { "targs" : type,
1222 "class_name" : Name }
1223 exec_output += NeonExecDeclare.subst(substDict)
1224
1225 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1226 global header_output, exec_output
1227 eWalkCode = simdEnabledCheckCode + '''
1228 RegVect srcReg1, destReg;
1229 '''
1230 for reg in range(rCount):
1231 eWalkCode += '''
1232 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1233 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1234 ''' % { "reg" : reg }
1235 if readDest:
1236 eWalkCode += '''
1237 ''' % { "reg" : reg }
1238 readDestCode = ''
1239 if readDest:
1240 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1241 eWalkCode += op
1242 for reg in range(rCount):
1243 eWalkCode += '''
1244 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1245 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1246 ''' % { "reg" : reg }
1247 iop = InstObjParams(name, Name,
1248 "RegRegOp",
1249 { "code": eWalkCode,
1250 "r_count": rCount,
1251 "predicate_test": predicateTest,
1252 "op_class": opClass }, [])
1253 header_output += NeonRegRegOpDeclare.subst(iop)
1254 exec_output += NeonEqualRegExecute.subst(iop)
1255 for type in types:
1256 substDict = { "targs" : type,
1257 "class_name" : Name }
1258 exec_output += NeonExecDeclare.subst(substDict)
1259
1260 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1261 readDest=False, toInt=False):
1262 global header_output, exec_output
1263 eWalkCode = simdEnabledCheckCode + '''
1264 typedef FloatReg FloatVect[rCount];
1265 FloatVect srcRegs1;
1266 '''
1267 if toInt:
1268 eWalkCode += 'RegVect destRegs;\n'
1269 else:
1270 eWalkCode += 'FloatVect destRegs;\n'
1271 for reg in range(rCount):
1272 eWalkCode += '''
1273 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1274 ''' % { "reg" : reg }
1275 if readDest:
1276 if toInt:
1277 eWalkCode += '''
1278 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1279 ''' % { "reg" : reg }
1280 else:
1281 eWalkCode += '''
1282 destRegs[%(reg)d] = FpDestP%(reg)d;
1283 ''' % { "reg" : reg }
1284 readDestCode = ''
1285 if readDest:
1286 readDestCode = 'destReg = destRegs[i];'
1287 destType = 'FloatReg'
1288 writeDest = 'destRegs[r] = destReg;'
1289 if toInt:
1290 destType = 'FloatRegBits'
1291 writeDest = 'destRegs.regs[r] = destReg;'
1292 eWalkCode += '''
1293 for (unsigned r = 0; r < rCount; r++) {
1294 FloatReg srcReg1 = srcRegs1[r];
1295 %(destType)s destReg;
1296 %(readDest)s
1297 %(op)s
1298 %(writeDest)s
1299 }
1300 ''' % { "op" : op,
1301 "readDest" : readDestCode,
1302 "destType" : destType,
1303 "writeDest" : writeDest }
1304 for reg in range(rCount):
1305 if toInt:
1306 eWalkCode += '''
1307 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1308 ''' % { "reg" : reg }
1309 else:
1310 eWalkCode += '''
1311 FpDestP%(reg)d = destRegs[%(reg)d];
1312 ''' % { "reg" : reg }
1313 iop = InstObjParams(name, Name,
1314 "FpRegRegOp",
1315 { "code": eWalkCode,
1316 "r_count": rCount,
1317 "predicate_test": predicateTest,
1318 "op_class": opClass }, [])
1319 header_output += NeonRegRegOpDeclare.subst(iop)
1320 exec_output += NeonEqualRegExecute.subst(iop)
1321 for type in types:
1322 substDict = { "targs" : type,
1323 "class_name" : Name }
1324 exec_output += NeonExecDeclare.subst(substDict)
1325
1326 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1327 global header_output, exec_output
1328 eWalkCode = simdEnabledCheckCode + '''
1329 RegVect srcRegs;
1330 BigRegVect destReg;
1331 '''
1332 for reg in range(rCount):
1333 eWalkCode += '''
1334 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1335 ''' % { "reg" : reg }
1336 if readDest:
1337 eWalkCode += '''
1338 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1339 ''' % { "reg" : reg }
1340 readDestCode = ''
1341 if readDest:
1342 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1343 eWalkCode += '''
1344 for (unsigned i = 0; i < eCount / 2; i++) {
1345 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1346 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1347 BigElement destElem;
1348 %(readDest)s
1349 %(op)s
1350 destReg.elements[i] = htog(destElem);
1351 }
1352 ''' % { "op" : op, "readDest" : readDestCode }
1353 for reg in range(rCount):
1354 eWalkCode += '''
1355 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1356 ''' % { "reg" : reg }
1357 iop = InstObjParams(name, Name,
1358 "RegRegOp",
1359 { "code": eWalkCode,
1360 "r_count": rCount,
1361 "predicate_test": predicateTest,
1362 "op_class": opClass }, [])
1363 header_output += NeonRegRegOpDeclare.subst(iop)
1364 exec_output += NeonUnequalRegExecute.subst(iop)
1365 for type in types:
1366 substDict = { "targs" : type,
1367 "class_name" : Name }
1368 exec_output += NeonExecDeclare.subst(substDict)
1369
1370 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1371 global header_output, exec_output
1372 eWalkCode = simdEnabledCheckCode + '''
1373 BigRegVect srcReg1;
1374 RegVect destReg;
1375 '''
1376 for reg in range(4):
1377 eWalkCode += '''
1378 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1379 ''' % { "reg" : reg }
1380 if readDest:
1381 for reg in range(2):
1382 eWalkCode += '''
1383 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1384 ''' % { "reg" : reg }
1385 readDestCode = ''
1386 if readDest:
1387 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1388 eWalkCode += '''
1389 for (unsigned i = 0; i < eCount; i++) {
1390 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1391 Element destElem;
1392 %(readDest)s
1393 %(op)s
1394 destReg.elements[i] = htog(destElem);
1395 }
1396 ''' % { "op" : op, "readDest" : readDestCode }
1397 for reg in range(2):
1398 eWalkCode += '''
1399 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1400 ''' % { "reg" : reg }
1401 iop = InstObjParams(name, Name,
1402 "RegRegOp",
1403 { "code": eWalkCode,
1404 "r_count": 2,
1405 "predicate_test": predicateTest,
1406 "op_class": opClass }, [])
1407 header_output += NeonRegRegOpDeclare.subst(iop)
1408 exec_output += NeonUnequalRegExecute.subst(iop)
1409 for type in types:
1410 substDict = { "targs" : type,
1411 "class_name" : Name }
1412 exec_output += NeonExecDeclare.subst(substDict)
1413
1414 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1415 global header_output, exec_output
1416 eWalkCode = simdEnabledCheckCode + '''
1417 RegVect destReg;
1418 '''
1419 if readDest:
1420 for reg in range(rCount):
1421 eWalkCode += '''
1422 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1423 ''' % { "reg" : reg }
1424 readDestCode = ''
1425 if readDest:
1426 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1427 eWalkCode += '''
1428 for (unsigned i = 0; i < eCount; i++) {
1429 Element destElem;
1430 %(readDest)s
1431 %(op)s
1432 destReg.elements[i] = htog(destElem);
1433 }
1434 ''' % { "op" : op, "readDest" : readDestCode }
1435 for reg in range(rCount):
1436 eWalkCode += '''
1437 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1438 ''' % { "reg" : reg }
1439 iop = InstObjParams(name, Name,
1440 "RegImmOp",
1441 { "code": eWalkCode,
1442 "r_count": rCount,
1443 "predicate_test": predicateTest,
1444 "op_class": opClass }, [])
1445 header_output += NeonRegImmOpDeclare.subst(iop)
1446 exec_output += NeonEqualRegExecute.subst(iop)
1447 for type in types:
1448 substDict = { "targs" : type,
1449 "class_name" : Name }
1450 exec_output += NeonExecDeclare.subst(substDict)
1451
1452 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1453 global header_output, exec_output
1454 eWalkCode = simdEnabledCheckCode + '''
1455 RegVect srcReg1;
1456 BigRegVect destReg;
1457 '''
1458 for reg in range(2):
1459 eWalkCode += '''
1460 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1461 ''' % { "reg" : reg }
1462 if readDest:
1463 for reg in range(4):
1464 eWalkCode += '''
1465 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1466 ''' % { "reg" : reg }
1467 readDestCode = ''
1468 if readDest:
1469 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1470 eWalkCode += '''
1471 for (unsigned i = 0; i < eCount; i++) {
1472 Element srcElem1 = gtoh(srcReg1.elements[i]);
1473 BigElement destElem;
1474 %(readDest)s
1475 %(op)s
1476 destReg.elements[i] = htog(destElem);
1477 }
1478 ''' % { "op" : op, "readDest" : readDestCode }
1479 for reg in range(4):
1480 eWalkCode += '''
1481 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1482 ''' % { "reg" : reg }
1483 iop = InstObjParams(name, Name,
1484 "RegRegOp",
1485 { "code": eWalkCode,
1486 "r_count": 2,
1487 "predicate_test": predicateTest,
1488 "op_class": opClass }, [])
1489 header_output += NeonRegRegOpDeclare.subst(iop)
1490 exec_output += NeonUnequalRegExecute.subst(iop)
1491 for type in types:
1492 substDict = { "targs" : type,
1493 "class_name" : Name }
1494 exec_output += NeonExecDeclare.subst(substDict)
1495
1496 vhaddCode = '''
1497 Element carryBit =
1498 (((unsigned)srcElem1 & 0x1) +
1499 ((unsigned)srcElem2 & 0x1)) >> 1;
1500 // Use division instead of a shift to ensure the sign extension works
1501 // right. The compiler will figure out if it can be a shift. Mask the
1502 // inputs so they get truncated correctly.
1503 destElem = (((srcElem1 & ~(Element)1) / 2) +
1504 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1505 '''
1506 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1507 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1508
1509 vrhaddCode = '''
1510 Element carryBit =
1511 (((unsigned)srcElem1 & 0x1) +
1512 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1513 // Use division instead of a shift to ensure the sign extension works
1514 // right. The compiler will figure out if it can be a shift. Mask the
1515 // inputs so they get truncated correctly.
1516 destElem = (((srcElem1 & ~(Element)1) / 2) +
1517 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1518 '''
1519 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1520 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1521
1522 vhsubCode = '''
1523 Element barrowBit =
1524 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1525 // Use division instead of a shift to ensure the sign extension works
1526 // right. The compiler will figure out if it can be a shift. Mask the
1527 // inputs so they get truncated correctly.
1528 destElem = (((srcElem1 & ~(Element)1) / 2) -
1529 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1530 '''
1531 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1532 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1533
1534 vandCode = '''
1535 destElem = srcElem1 & srcElem2;
1536 '''
1537 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1538 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1539
1540 vbicCode = '''
1541 destElem = srcElem1 & ~srcElem2;
1542 '''
1543 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1544 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1545
1546 vorrCode = '''
1547 destElem = srcElem1 | srcElem2;
1548 '''
1549 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1550 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1551
1552 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1553 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1554
1555 vornCode = '''
1556 destElem = srcElem1 | ~srcElem2;
1557 '''
1558 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1559 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1560
1561 veorCode = '''
1562 destElem = srcElem1 ^ srcElem2;
1563 '''
1564 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1565 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1566
1567 vbifCode = '''
1568 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1569 '''
1570 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1571 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1572 vbitCode = '''
1573 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1574 '''
1575 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1576 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1577 vbslCode = '''
1578 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1579 '''
1580 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1581 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1582
1583 vmaxCode = '''
1584 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1585 '''
1586 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1587 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1588
1589 vminCode = '''
1590 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1591 '''
1592 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1593 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1594
1595 vaddCode = '''
1596 destElem = srcElem1 + srcElem2;
1597 '''
1598 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1599 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1600
1601 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1602 2, vaddCode, pairwise=True)
1603 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1604 4, vaddCode, pairwise=True)
1605 vaddlwCode = '''
1606 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1607 '''
1608 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1609 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1610 vaddhnCode = '''
1611 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1612 (sizeof(Element) * 8);
1613 '''
1614 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1615 vraddhnCode = '''
1616 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1617 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1618 (sizeof(Element) * 8);
1619 '''
1620 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1621
1622 vsubCode = '''
1623 destElem = srcElem1 - srcElem2;
1624 '''
1625 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1626 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1627 vsublwCode = '''
1628 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1629 '''
1630 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1631 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1632
1633 vqaddUCode = '''
1634 destElem = srcElem1 + srcElem2;
1635 FPSCR fpscr = (FPSCR) FpscrQc;
1636 if (destElem < srcElem1 || destElem < srcElem2) {
1637 destElem = (Element)(-1);
1638 fpscr.qc = 1;
1639 }
1640 FpscrQc = fpscr;
1641 '''
1642 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1643 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1644 vsubhnCode = '''
1645 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1646 (sizeof(Element) * 8);
1647 '''
1648 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1649 vrsubhnCode = '''
1650 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1651 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1652 (sizeof(Element) * 8);
1653 '''
1654 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1655
1656 vqaddSCode = '''
1657 destElem = srcElem1 + srcElem2;
1658 FPSCR fpscr = (FPSCR) FpscrQc;
1659 bool negDest = (destElem < 0);
1660 bool negSrc1 = (srcElem1 < 0);
1661 bool negSrc2 = (srcElem2 < 0);
1662 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1663 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1664 if (negDest)
1665 destElem -= 1;
1666 fpscr.qc = 1;
1667 }
1668 FpscrQc = fpscr;
1669 '''
1670 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1671 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1672
1673 vqsubUCode = '''
1674 destElem = srcElem1 - srcElem2;
1675 FPSCR fpscr = (FPSCR) FpscrQc;
1676 if (destElem > srcElem1) {
1677 destElem = 0;
1678 fpscr.qc = 1;
1679 }
1680 FpscrQc = fpscr;
1681 '''
1682 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1683 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1684
1685 vqsubSCode = '''
1686 destElem = srcElem1 - srcElem2;
1687 FPSCR fpscr = (FPSCR) FpscrQc;
1688 bool negDest = (destElem < 0);
1689 bool negSrc1 = (srcElem1 < 0);
1690 bool posSrc2 = (srcElem2 >= 0);
1691 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1692 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1693 if (negDest)
1694 destElem -= 1;
1695 fpscr.qc = 1;
1696 }
1697 FpscrQc = fpscr;
1698 '''
1699 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1700 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1701
1702 vcgtCode = '''
1703 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1704 '''
1705 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1706 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1707
1708 vcgeCode = '''
1709 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1710 '''
1711 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1712 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1713
1714 vceqCode = '''
1715 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1716 '''
1717 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1718 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1719
1720 vshlCode = '''
1721 int16_t shiftAmt = (int8_t)srcElem2;
1722 if (shiftAmt < 0) {
1723 shiftAmt = -shiftAmt;
1724 if (shiftAmt >= sizeof(Element) * 8) {
1725 shiftAmt = sizeof(Element) * 8 - 1;
1726 destElem = 0;
1727 } else {
1728 destElem = (srcElem1 >> shiftAmt);
1729 }
1730 // Make sure the right shift sign extended when it should.
1731 if (ltz(srcElem1) && !ltz(destElem)) {
1732 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1733 1 - shiftAmt));
1734 }
1735 } else {
1736 if (shiftAmt >= sizeof(Element) * 8) {
1737 destElem = 0;
1738 } else {
1739 destElem = srcElem1 << shiftAmt;
1740 }
1741 }
1742 '''
1743 threeEqualRegInst("vshl", "VshlD", "SimdAluOp", allTypes, 2, vshlCode)
1744 threeEqualRegInst("vshl", "VshlQ", "SimdAluOp", allTypes, 4, vshlCode)
1745
1746 vrshlCode = '''
1747 int16_t shiftAmt = (int8_t)srcElem2;
1748 if (shiftAmt < 0) {
1749 shiftAmt = -shiftAmt;
1750 Element rBit = 0;
1751 if (shiftAmt <= sizeof(Element) * 8)
1752 rBit = bits(srcElem1, shiftAmt - 1);
1753 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1754 rBit = 1;
1755 if (shiftAmt >= sizeof(Element) * 8) {
1756 shiftAmt = sizeof(Element) * 8 - 1;
1757 destElem = 0;
1758 } else {
1759 destElem = (srcElem1 >> shiftAmt);
1760 }
1761 // Make sure the right shift sign extended when it should.
1762 if (ltz(srcElem1) && !ltz(destElem)) {
1763 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1764 1 - shiftAmt));
1765 }
1766 destElem += rBit;
1767 } else if (shiftAmt > 0) {
1768 if (shiftAmt >= sizeof(Element) * 8) {
1769 destElem = 0;
1770 } else {
1771 destElem = srcElem1 << shiftAmt;
1772 }
1773 } else {
1774 destElem = srcElem1;
1775 }
1776 '''
1777 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1778 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1779
1780 vqshlUCode = '''
1781 int16_t shiftAmt = (int8_t)srcElem2;
1782 FPSCR fpscr = (FPSCR) FpscrQc;
1783 if (shiftAmt < 0) {
1784 shiftAmt = -shiftAmt;
1785 if (shiftAmt >= sizeof(Element) * 8) {
1786 shiftAmt = sizeof(Element) * 8 - 1;
1787 destElem = 0;
1788 } else {
1789 destElem = (srcElem1 >> shiftAmt);
1790 }
1791 } else if (shiftAmt > 0) {
1792 if (shiftAmt >= sizeof(Element) * 8) {
1793 if (srcElem1 != 0) {
1794 destElem = mask(sizeof(Element) * 8);
1795 fpscr.qc = 1;
1796 } else {
1797 destElem = 0;
1798 }
1799 } else {
1800 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1801 sizeof(Element) * 8 - shiftAmt)) {
1802 destElem = mask(sizeof(Element) * 8);
1803 fpscr.qc = 1;
1804 } else {
1805 destElem = srcElem1 << shiftAmt;
1806 }
1807 }
1808 } else {
1809 destElem = srcElem1;
1810 }
1811 FpscrQc = fpscr;
1812 '''
1813 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1814 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1815
1816 vqshlSCode = '''
1817 int16_t shiftAmt = (int8_t)srcElem2;
1818 FPSCR fpscr = (FPSCR) FpscrQc;
1819 if (shiftAmt < 0) {
1820 shiftAmt = -shiftAmt;
1821 if (shiftAmt >= sizeof(Element) * 8) {
1822 shiftAmt = sizeof(Element) * 8 - 1;
1823 destElem = 0;
1824 } else {
1825 destElem = (srcElem1 >> shiftAmt);
1826 }
1827 // Make sure the right shift sign extended when it should.
1828 if (srcElem1 < 0 && destElem >= 0) {
1829 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1830 1 - shiftAmt));
1831 }
1832 } else if (shiftAmt > 0) {
1833 bool sat = false;
1834 if (shiftAmt >= sizeof(Element) * 8) {
1835 if (srcElem1 != 0)
1836 sat = true;
1837 else
1838 destElem = 0;
1839 } else {
1840 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1841 sizeof(Element) * 8 - 1 - shiftAmt) !=
1842 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1843 sat = true;
1844 } else {
1845 destElem = srcElem1 << shiftAmt;
1846 }
1847 }
1848 if (sat) {
1849 fpscr.qc = 1;
1850 destElem = mask(sizeof(Element) * 8 - 1);
1851 if (srcElem1 < 0)
1852 destElem = ~destElem;
1853 }
1854 } else {
1855 destElem = srcElem1;
1856 }
1857 FpscrQc = fpscr;
1858 '''
1859 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1860 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1861
1862 vqrshlUCode = '''
1863 int16_t shiftAmt = (int8_t)srcElem2;
1864 FPSCR fpscr = (FPSCR) FpscrQc;
1865 if (shiftAmt < 0) {
1866 shiftAmt = -shiftAmt;
1867 Element rBit = 0;
1868 if (shiftAmt <= sizeof(Element) * 8)
1869 rBit = bits(srcElem1, shiftAmt - 1);
1870 if (shiftAmt >= sizeof(Element) * 8) {
1871 shiftAmt = sizeof(Element) * 8 - 1;
1872 destElem = 0;
1873 } else {
1874 destElem = (srcElem1 >> shiftAmt);
1875 }
1876 destElem += rBit;
1877 } else {
1878 if (shiftAmt >= sizeof(Element) * 8) {
1879 if (srcElem1 != 0) {
1880 destElem = mask(sizeof(Element) * 8);
1881 fpscr.qc = 1;
1882 } else {
1883 destElem = 0;
1884 }
1885 } else {
1886 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1887 sizeof(Element) * 8 - shiftAmt)) {
1888 destElem = mask(sizeof(Element) * 8);
1889 fpscr.qc = 1;
1890 } else {
1891 destElem = srcElem1 << shiftAmt;
1892 }
1893 }
1894 }
1895 FpscrQc = fpscr;
1896 '''
1897 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1898 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1899
1900 vqrshlSCode = '''
1901 int16_t shiftAmt = (int8_t)srcElem2;
1902 FPSCR fpscr = (FPSCR) FpscrQc;
1903 if (shiftAmt < 0) {
1904 shiftAmt = -shiftAmt;
1905 Element rBit = 0;
1906 if (shiftAmt <= sizeof(Element) * 8)
1907 rBit = bits(srcElem1, shiftAmt - 1);
1908 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1909 rBit = 1;
1910 if (shiftAmt >= sizeof(Element) * 8) {
1911 shiftAmt = sizeof(Element) * 8 - 1;
1912 destElem = 0;
1913 } else {
1914 destElem = (srcElem1 >> shiftAmt);
1915 }
1916 // Make sure the right shift sign extended when it should.
1917 if (srcElem1 < 0 && destElem >= 0) {
1918 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1919 1 - shiftAmt));
1920 }
1921 destElem += rBit;
1922 } else if (shiftAmt > 0) {
1923 bool sat = false;
1924 if (shiftAmt >= sizeof(Element) * 8) {
1925 if (srcElem1 != 0)
1926 sat = true;
1927 else
1928 destElem = 0;
1929 } else {
1930 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1931 sizeof(Element) * 8 - 1 - shiftAmt) !=
1932 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1933 sat = true;
1934 } else {
1935 destElem = srcElem1 << shiftAmt;
1936 }
1937 }
1938 if (sat) {
1939 fpscr.qc = 1;
1940 destElem = mask(sizeof(Element) * 8 - 1);
1941 if (srcElem1 < 0)
1942 destElem = ~destElem;
1943 }
1944 } else {
1945 destElem = srcElem1;
1946 }
1947 FpscrQc = fpscr;
1948 '''
1949 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1950 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1951
1952 vabaCode = '''
1953 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1954 (srcElem2 - srcElem1);
1955 '''
1956 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1957 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1958 vabalCode = '''
1959 destElem += (srcElem1 > srcElem2) ?
1960 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1961 ((BigElement)srcElem2 - (BigElement)srcElem1);
1962 '''
1963 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1964
1965 vabdCode = '''
1966 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1967 (srcElem2 - srcElem1);
1968 '''
1969 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1970 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1971 vabdlCode = '''
1972 destElem = (srcElem1 > srcElem2) ?
1973 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1974 ((BigElement)srcElem2 - (BigElement)srcElem1);
1975 '''
1976 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1977
1978 vtstCode = '''
1979 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1980 '''
1981 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1982 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1983
1984 vmulCode = '''
1985 destElem = srcElem1 * srcElem2;
1986 '''
1987 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
1988 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
1989 vmullCode = '''
1990 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1991 '''
1992 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
1993
1994 vmlaCode = '''
1995 destElem = destElem + srcElem1 * srcElem2;
1996 '''
1997 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
1998 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
1999 vmlalCode = '''
2000 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2001 '''
2002 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2003
2004 vqdmlalCode = '''
2005 FPSCR fpscr = (FPSCR) FpscrQc;
2006 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2007 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2008 Element halfNeg = maxNeg / 2;
2009 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2010 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2011 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2012 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2013 fpscr.qc = 1;
2014 }
2015 bool negPreDest = ltz(destElem);
2016 destElem += midElem;
2017 bool negDest = ltz(destElem);
2018 bool negMid = ltz(midElem);
2019 if (negPreDest == negMid && negMid != negDest) {
2020 destElem = mask(sizeof(BigElement) * 8 - 1);
2021 if (negPreDest)
2022 destElem = ~destElem;
2023 fpscr.qc = 1;
2024 }
2025 FpscrQc = fpscr;
2026 '''
2027 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2028
2029 vqdmlslCode = '''
2030 FPSCR fpscr = (FPSCR) FpscrQc;
2031 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2032 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2033 Element halfNeg = maxNeg / 2;
2034 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2035 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2036 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2037 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2038 fpscr.qc = 1;
2039 }
2040 bool negPreDest = ltz(destElem);
2041 destElem -= midElem;
2042 bool negDest = ltz(destElem);
2043 bool posMid = ltz((BigElement)-midElem);
2044 if (negPreDest == posMid && posMid != negDest) {
2045 destElem = mask(sizeof(BigElement) * 8 - 1);
2046 if (negPreDest)
2047 destElem = ~destElem;
2048 fpscr.qc = 1;
2049 }
2050 FpscrQc = fpscr;
2051 '''
2052 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2053
2054 vqdmullCode = '''
2055 FPSCR fpscr = (FPSCR) FpscrQc;
2056 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2057 if (srcElem1 == srcElem2 &&
2058 srcElem1 == (Element)((Element)1 <<
2059 (Element)(sizeof(Element) * 8 - 1))) {
2060 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2061 fpscr.qc = 1;
2062 }
2063 FpscrQc = fpscr;
2064 '''
2065 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2066
2067 vmlsCode = '''
2068 destElem = destElem - srcElem1 * srcElem2;
2069 '''
2070 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2071 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2072 vmlslCode = '''
2073 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2074 '''
2075 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2076
2077 vmulpCode = '''
2078 destElem = 0;
2079 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2080 if (bits(srcElem2, j))
2081 destElem ^= srcElem1 << j;
2082 }
2083 '''
2084 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2085 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2086 vmullpCode = '''
2087 destElem = 0;
2088 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2089 if (bits(srcElem2, j))
2090 destElem ^= (BigElement)srcElem1 << j;
2091 }
2092 '''
2093 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2094
2095 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2096 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2097
2098 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2099 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2100
2101 vqdmulhCode = '''
2102 FPSCR fpscr = (FPSCR) FpscrQc;
2103 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2104 (sizeof(Element) * 8);
2105 if (srcElem1 == srcElem2 &&
2106 srcElem1 == (Element)((Element)1 <<
2107 (sizeof(Element) * 8 - 1))) {
2108 destElem = ~srcElem1;
2109 fpscr.qc = 1;
2110 }
2111 FpscrQc = fpscr;
2112 '''
2113 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2114 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2115
2116 vqrdmulhCode = '''
2117 FPSCR fpscr = (FPSCR) FpscrQc;
2118 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2119 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2120 (sizeof(Element) * 8);
2121 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2122 Element halfNeg = maxNeg / 2;
2123 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2124 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2125 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2126 if (destElem < 0) {
2127 destElem = mask(sizeof(Element) * 8 - 1);
2128 } else {
2129 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2130 }
2131 fpscr.qc = 1;
2132 }
2133 FpscrQc = fpscr;
2134 '''
2135 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2136 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2137 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2138 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2139
2140 vmaxfpCode = '''
2141 FPSCR fpscr = (FPSCR) FpscrExc;
2142 bool done;
2143 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2144 if (!done) {
2145 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2146 true, true, VfpRoundNearest);
2147 } else if (flushToZero(srcReg1, srcReg2)) {
2148 fpscr.idc = 1;
2149 }
2150 FpscrExc = fpscr;
2151 '''
2152 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2153 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2154
2155 vminfpCode = '''
2156 FPSCR fpscr = (FPSCR) FpscrExc;
2157 bool done;
2158 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2159 if (!done) {
2160 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2161 true, true, VfpRoundNearest);
2162 } else if (flushToZero(srcReg1, srcReg2)) {
2163 fpscr.idc = 1;
2164 }
2165 FpscrExc = fpscr;
2166 '''
2167 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2168 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2169
2170 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2171 2, vmaxfpCode, pairwise=True)
2172 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2173 4, vmaxfpCode, pairwise=True)
2174
2175 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2176 2, vminfpCode, pairwise=True)
2177 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2178 4, vminfpCode, pairwise=True)
2179
2180 vaddfpCode = '''
2181 FPSCR fpscr = (FPSCR) FpscrExc;
2182 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2183 true, true, VfpRoundNearest);
2184 FpscrExc = fpscr;
2185 '''
2186 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2187 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2188
2189 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2190 2, vaddfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2192 4, vaddfpCode, pairwise=True)
2193
2194 vsubfpCode = '''
2195 FPSCR fpscr = (FPSCR) FpscrExc;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2197 true, true, VfpRoundNearest);
2198 FpscrExc = fpscr;
2199 '''
2200 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2201 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2202
2203 vmulfpCode = '''
2204 FPSCR fpscr = (FPSCR) FpscrExc;
2205 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2206 true, true, VfpRoundNearest);
2207 FpscrExc = fpscr;
2208 '''
2209 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2210 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2211
2212 vmlafpCode = '''
2213 FPSCR fpscr = (FPSCR) FpscrExc;
2214 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2215 true, true, VfpRoundNearest);
2216 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2217 true, true, VfpRoundNearest);
2218 FpscrExc = fpscr;
2219 '''
2220 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2221 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2222
2223 vmlsfpCode = '''
2224 FPSCR fpscr = (FPSCR) FpscrExc;
2225 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2226 true, true, VfpRoundNearest);
2227 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2228 true, true, VfpRoundNearest);
2229 FpscrExc = fpscr;
2230 '''
2231 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2232 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2233
2234 vcgtfpCode = '''
2235 FPSCR fpscr = (FPSCR) FpscrExc;
2236 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2237 true, true, VfpRoundNearest);
2238 destReg = (res == 0) ? -1 : 0;
2239 if (res == 2.0)
2240 fpscr.ioc = 1;
2241 FpscrExc = fpscr;
2242 '''
2243 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2244 2, vcgtfpCode, toInt = True)
2245 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2246 4, vcgtfpCode, toInt = True)
2247
2248 vcgefpCode = '''
2249 FPSCR fpscr = (FPSCR) FpscrExc;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2253 if (res == 2.0)
2254 fpscr.ioc = 1;
2255 FpscrExc = fpscr;
2256 '''
2257 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgefpCode, toInt = True)
2259 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgefpCode, toInt = True)
2261
2262 vacgtfpCode = '''
2263 FPSCR fpscr = (FPSCR) FpscrExc;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2267 if (res == 2.0)
2268 fpscr.ioc = 1;
2269 FpscrExc = fpscr;
2270 '''
2271 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2272 2, vacgtfpCode, toInt = True)
2273 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2274 4, vacgtfpCode, toInt = True)
2275
2276 vacgefpCode = '''
2277 FPSCR fpscr = (FPSCR) FpscrExc;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2281 if (res == 2.0)
2282 fpscr.ioc = 1;
2283 FpscrExc = fpscr;
2284 '''
2285 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgefpCode, toInt = True)
2287 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgefpCode, toInt = True)
2289
2290 vceqfpCode = '''
2291 FPSCR fpscr = (FPSCR) FpscrExc;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2295 if (res == 2.0)
2296 fpscr.ioc = 1;
2297 FpscrExc = fpscr;
2298 '''
2299 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2300 2, vceqfpCode, toInt = True)
2301 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2302 4, vceqfpCode, toInt = True)
2303
2304 vrecpsCode = '''
2305 FPSCR fpscr = (FPSCR) FpscrExc;
2306 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2307 true, true, VfpRoundNearest);
2308 FpscrExc = fpscr;
2309 '''
2310 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2311 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2312
2313 vrsqrtsCode = '''
2314 FPSCR fpscr = (FPSCR) FpscrExc;
2315 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2316 true, true, VfpRoundNearest);
2317 FpscrExc = fpscr;
2318 '''
2319 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2320 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2321
2322 vabdfpCode = '''
2323 FPSCR fpscr = (FPSCR) FpscrExc;
2324 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2325 true, true, VfpRoundNearest);
2326 destReg = fabs(mid);
2327 FpscrExc = fpscr;
2328 '''
2329 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2330 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2331
2332 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2333 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2334 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2335 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2336 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2337
2338 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2339 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2340 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2341 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2342 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2343
2344 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2345 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2346 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2347 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2348 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2349
2350 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2351 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2352 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2353 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2354 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2355 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2356 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2357 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2358 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2359
2360 vshrCode = '''
2361 if (imm >= sizeof(srcElem1) * 8) {
2362 if (ltz(srcElem1))
2363 destElem = -1;
2364 else
2365 destElem = 0;
2366 } else {
2367 destElem = srcElem1 >> imm;
2368 }
2369 '''
2370 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2371 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2372
2373 vsraCode = '''
2374 Element mid;;
2375 if (imm >= sizeof(srcElem1) * 8) {
2376 mid = ltz(srcElem1) ? -1 : 0;
2377 } else {
2378 mid = srcElem1 >> imm;
2379 if (ltz(srcElem1) && !ltz(mid)) {
2380 mid |= -(mid & ((Element)1 <<
2381 (sizeof(Element) * 8 - 1 - imm)));
2382 }
2383 }
2384 destElem += mid;
2385 '''
2386 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2387 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2388
2389 vrshrCode = '''
2390 if (imm > sizeof(srcElem1) * 8) {
2391 destElem = 0;
2392 } else if (imm) {
2393 Element rBit = bits(srcElem1, imm - 1);
2394 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2395 } else {
2396 destElem = srcElem1;
2397 }
2398 '''
2399 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2400 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2401
2402 vrsraCode = '''
2403 if (imm > sizeof(srcElem1) * 8) {
2404 destElem += 0;
2405 } else if (imm) {
2406 Element rBit = bits(srcElem1, imm - 1);
2407 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2408 } else {
2409 destElem += srcElem1;
2410 }
2411 '''
2412 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2413 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2414
2415 vsriCode = '''
2416 if (imm >= sizeof(Element) * 8)
2417 destElem = destElem;
2418 else
2419 destElem = (srcElem1 >> imm) |
2420 (destElem & ~mask(sizeof(Element) * 8 - imm));
2421 '''
2422 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2423 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2424
2425 vshlCode = '''
2426 if (imm >= sizeof(Element) * 8)
2427 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2428 else
2429 destElem = srcElem1 << imm;
2430 '''
2431 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2432 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2433
2434 vsliCode = '''
2435 if (imm >= sizeof(Element) * 8)
2436 destElem = destElem;
2437 else
2438 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2439 '''
2440 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2441 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2442
2443 vqshlCode = '''
2444 FPSCR fpscr = (FPSCR) FpscrQc;
2445 if (imm >= sizeof(Element) * 8) {
2446 if (srcElem1 != 0) {
2447 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2448 if (srcElem1 > 0)
2449 destElem = ~destElem;
2450 fpscr.qc = 1;
2451 } else {
2452 destElem = 0;
2453 }
2454 } else if (imm) {
2455 destElem = (srcElem1 << imm);
2456 uint64_t topBits = bits((uint64_t)srcElem1,
2457 sizeof(Element) * 8 - 1,
2458 sizeof(Element) * 8 - 1 - imm);
2459 if (topBits != 0 && topBits != mask(imm + 1)) {
2460 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2461 if (srcElem1 > 0)
2462 destElem = ~destElem;
2463 fpscr.qc = 1;
2464 }
2465 } else {
2466 destElem = srcElem1;
2467 }
2468 FpscrQc = fpscr;
2469 '''
2470 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2471 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2472
2473 vqshluCode = '''
2474 FPSCR fpscr = (FPSCR) FpscrQc;
2475 if (imm >= sizeof(Element) * 8) {
2476 if (srcElem1 != 0) {
2477 destElem = mask(sizeof(Element) * 8);
2478 fpscr.qc = 1;
2479 } else {
2480 destElem = 0;
2481 }
2482 } else if (imm) {
2483 destElem = (srcElem1 << imm);
2484 uint64_t topBits = bits((uint64_t)srcElem1,
2485 sizeof(Element) * 8 - 1,
2486 sizeof(Element) * 8 - imm);
2487 if (topBits != 0) {
2488 destElem = mask(sizeof(Element) * 8);
2489 fpscr.qc = 1;
2490 }
2491 } else {
2492 destElem = srcElem1;
2493 }
2494 FpscrQc = fpscr;
2495 '''
2496 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2497 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2498
2499 vqshlusCode = '''
2500 FPSCR fpscr = (FPSCR) FpscrQc;
2501 if (imm >= sizeof(Element) * 8) {
2502 if (srcElem1 < 0) {
2503 destElem = 0;
2504 fpscr.qc = 1;
2505 } else if (srcElem1 > 0) {
2506 destElem = mask(sizeof(Element) * 8);
2507 fpscr.qc = 1;
2508 } else {
2509 destElem = 0;
2510 }
2511 } else if (imm) {
2512 destElem = (srcElem1 << imm);
2513 uint64_t topBits = bits((uint64_t)srcElem1,
2514 sizeof(Element) * 8 - 1,
2515 sizeof(Element) * 8 - imm);
2516 if (srcElem1 < 0) {
2517 destElem = 0;
2518 fpscr.qc = 1;
2519 } else if (topBits != 0) {
2520 destElem = mask(sizeof(Element) * 8);
2521 fpscr.qc = 1;
2522 }
2523 } else {
2524 if (srcElem1 < 0) {
2525 fpscr.qc = 1;
2526 destElem = 0;
2527 } else {
2528 destElem = srcElem1;
2529 }
2530 }
2531 FpscrQc = fpscr;
2532 '''
2533 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2534 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2535
2536 vshrnCode = '''
2537 if (imm >= sizeof(srcElem1) * 8) {
2538 destElem = 0;
2539 } else {
2540 destElem = srcElem1 >> imm;
2541 }
2542 '''
2543 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2544
2545 vrshrnCode = '''
2546 if (imm > sizeof(srcElem1) * 8) {
2547 destElem = 0;
2548 } else if (imm) {
2549 Element rBit = bits(srcElem1, imm - 1);
2550 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2551 } else {
2552 destElem = srcElem1;
2553 }
2554 '''
2555 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2556
2557 vqshrnCode = '''
2558 FPSCR fpscr = (FPSCR) FpscrQc;
2559 if (imm > sizeof(srcElem1) * 8) {
2560 if (srcElem1 != 0 && srcElem1 != -1)
2561 fpscr.qc = 1;
2562 destElem = 0;
2563 } else if (imm) {
2564 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2565 mid |= -(mid & ((BigElement)1 <<
2566 (sizeof(BigElement) * 8 - 1 - imm)));
2567 if (mid != (Element)mid) {
2568 destElem = mask(sizeof(Element) * 8 - 1);
2569 if (srcElem1 < 0)
2570 destElem = ~destElem;
2571 fpscr.qc = 1;
2572 } else {
2573 destElem = mid;
2574 }
2575 } else {
2576 destElem = srcElem1;
2577 }
2578 FpscrQc = fpscr;
2579 '''
2580 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2581
2582 vqshrunCode = '''
2583 FPSCR fpscr = (FPSCR) FpscrQc;
2584 if (imm > sizeof(srcElem1) * 8) {
2585 if (srcElem1 != 0)
2586 fpscr.qc = 1;
2587 destElem = 0;
2588 } else if (imm) {
2589 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2590 if (mid != (Element)mid) {
2591 destElem = mask(sizeof(Element) * 8);
2592 fpscr.qc = 1;
2593 } else {
2594 destElem = mid;
2595 }
2596 } else {
2597 destElem = srcElem1;
2598 }
2599 FpscrQc = fpscr;
2600 '''
2601 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2602 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2603
2604 vqshrunsCode = '''
2605 FPSCR fpscr = (FPSCR) FpscrQc;
2606 if (imm > sizeof(srcElem1) * 8) {
2607 if (srcElem1 != 0)
2608 fpscr.qc = 1;
2609 destElem = 0;
2610 } else if (imm) {
2611 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2612 if (bits(mid, sizeof(BigElement) * 8 - 1,
2613 sizeof(Element) * 8) != 0) {
2614 if (srcElem1 < 0) {
2615 destElem = 0;
2616 } else {
2617 destElem = mask(sizeof(Element) * 8);
2618 }
2619 fpscr.qc = 1;
2620 } else {
2621 destElem = mid;
2622 }
2623 } else {
2624 destElem = srcElem1;
2625 }
2626 FpscrQc = fpscr;
2627 '''
2628 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2629 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2630
2631 vqrshrnCode = '''
2632 FPSCR fpscr = (FPSCR) FpscrQc;
2633 if (imm > sizeof(srcElem1) * 8) {
2634 if (srcElem1 != 0 && srcElem1 != -1)
2635 fpscr.qc = 1;
2636 destElem = 0;
2637 } else if (imm) {
2638 BigElement mid = (srcElem1 >> (imm - 1));
2639 uint64_t rBit = mid & 0x1;
2640 mid >>= 1;
2641 mid |= -(mid & ((BigElement)1 <<
2642 (sizeof(BigElement) * 8 - 1 - imm)));
2643 mid += rBit;
2644 if (mid != (Element)mid) {
2645 destElem = mask(sizeof(Element) * 8 - 1);
2646 if (srcElem1 < 0)
2647 destElem = ~destElem;
2648 fpscr.qc = 1;
2649 } else {
2650 destElem = mid;
2651 }
2652 } else {
2653 if (srcElem1 != (Element)srcElem1) {
2654 destElem = mask(sizeof(Element) * 8 - 1);
2655 if (srcElem1 < 0)
2656 destElem = ~destElem;
2657 fpscr.qc = 1;
2658 } else {
2659 destElem = srcElem1;
2660 }
2661 }
2662 FpscrQc = fpscr;
2663 '''
2664 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2665 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2666
2667 vqrshrunCode = '''
2668 FPSCR fpscr = (FPSCR) FpscrQc;
2669 if (imm > sizeof(srcElem1) * 8) {
2670 if (srcElem1 != 0)
2671 fpscr.qc = 1;
2672 destElem = 0;
2673 } else if (imm) {
2674 BigElement mid = (srcElem1 >> (imm - 1));
2675 uint64_t rBit = mid & 0x1;
2676 mid >>= 1;
2677 mid += rBit;
2678 if (mid != (Element)mid) {
2679 destElem = mask(sizeof(Element) * 8);
2680 fpscr.qc = 1;
2681 } else {
2682 destElem = mid;
2683 }
2684 } else {
2685 if (srcElem1 != (Element)srcElem1) {
2686 destElem = mask(sizeof(Element) * 8 - 1);
2687 fpscr.qc = 1;
2688 } else {
2689 destElem = srcElem1;
2690 }
2691 }
2692 FpscrQc = fpscr;
2693 '''
2694 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2695 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2696
2697 vqrshrunsCode = '''
2698 FPSCR fpscr = (FPSCR) FpscrQc;
2699 if (imm > sizeof(srcElem1) * 8) {
2700 if (srcElem1 != 0)
2701 fpscr.qc = 1;
2702 destElem = 0;
2703 } else if (imm) {
2704 BigElement mid = (srcElem1 >> (imm - 1));
2705 uint64_t rBit = mid & 0x1;
2706 mid >>= 1;
2707 mid |= -(mid & ((BigElement)1 <<
2708 (sizeof(BigElement) * 8 - 1 - imm)));
2709 mid += rBit;
2710 if (bits(mid, sizeof(BigElement) * 8 - 1,
2711 sizeof(Element) * 8) != 0) {
2712 if (srcElem1 < 0) {
2713 destElem = 0;
2714 } else {
2715 destElem = mask(sizeof(Element) * 8);
2716 }
2717 fpscr.qc = 1;
2718 } else {
2719 destElem = mid;
2720 }
2721 } else {
2722 if (srcElem1 < 0) {
2723 fpscr.qc = 1;
2724 destElem = 0;
2725 } else {
2726 destElem = srcElem1;
2727 }
2728 }
2729 FpscrQc = fpscr;
2730 '''
2731 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2732 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2733
2734 vshllCode = '''
2735 if (imm >= sizeof(destElem) * 8) {
2736 destElem = 0;
2737 } else {
2738 destElem = (BigElement)srcElem1 << imm;
2739 }
2740 '''
2741 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2742
2743 vmovlCode = '''
2744 destElem = srcElem1;
2745 '''
2746 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2747
2748 vcvt2ufxCode = '''
2749 FPSCR fpscr = (FPSCR) FpscrExc;
2750 if (flushToZero(srcElem1))
2751 fpscr.idc = 1;
2752 VfpSavedState state = prepFpState(VfpRoundNearest);
2753 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2754 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2755 __asm__ __volatile__("" :: "m" (destReg));
2756 finishVfp(fpscr, state, true);
2757 FpscrExc = fpscr;
2758 '''
2759 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2760 2, vcvt2ufxCode, toInt = True)
2761 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2762 4, vcvt2ufxCode, toInt = True)
2763
2764 vcvt2sfxCode = '''
2765 FPSCR fpscr = (FPSCR) FpscrExc;
2766 if (flushToZero(srcElem1))
2767 fpscr.idc = 1;
2768 VfpSavedState state = prepFpState(VfpRoundNearest);
2769 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2770 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2771 __asm__ __volatile__("" :: "m" (destReg));
2772 finishVfp(fpscr, state, true);
2773 FpscrExc = fpscr;
2774 '''
2775 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2776 2, vcvt2sfxCode, toInt = True)
2777 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2778 4, vcvt2sfxCode, toInt = True)
2779
2780 vcvtu2fpCode = '''
2781 FPSCR fpscr = (FPSCR) FpscrExc;
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2784 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2785 __asm__ __volatile__("" :: "m" (destElem));
2786 finishVfp(fpscr, state, true);
2787 FpscrExc = fpscr;
2788 '''
2789 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2790 2, vcvtu2fpCode, fromInt = True)
2791 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2792 4, vcvtu2fpCode, fromInt = True)
2793
2794 vcvts2fpCode = '''
2795 FPSCR fpscr = (FPSCR) FpscrExc;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2801 FpscrExc = fpscr;
2802 '''
2803 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2804 2, vcvts2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvts2fpCode, fromInt = True)
2807
2808 vcvts2hCode = '''
2809 FPSCR fpscr = (FPSCR) FpscrExc;
2810 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2811 if (flushToZero(srcFp1))
2812 fpscr.idc = 1;
2813 VfpSavedState state = prepFpState(VfpRoundNearest);
2814 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2815 : "m" (srcFp1), "m" (destElem));
2816 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2817 fpscr.ahp, srcFp1);
2818 __asm__ __volatile__("" :: "m" (destElem));
2819 finishVfp(fpscr, state, true);
2820 FpscrExc = fpscr;
2821 '''
2822 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2823
2824 vcvth2sCode = '''
2825 FPSCR fpscr = (FPSCR) FpscrExc;
2826 VfpSavedState state = prepFpState(VfpRoundNearest);
2827 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2828 : "m" (srcElem1), "m" (destElem));
2829 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2830 __asm__ __volatile__("" :: "m" (destElem));
2831 finishVfp(fpscr, state, true);
2832 FpscrExc = fpscr;
2833 '''
2834 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2835
2836 vrsqrteCode = '''
2837 destElem = unsignedRSqrtEstimate(srcElem1);
2838 '''
2839 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2840 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2841
2842 vrsqrtefpCode = '''
2843 FPSCR fpscr = (FPSCR) FpscrExc;
2844 if (flushToZero(srcReg1))
2845 fpscr.idc = 1;
2846 destReg = fprSqrtEstimate(fpscr, srcReg1);
2847 FpscrExc = fpscr;
2848 '''
2849 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2850 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2851
2852 vrecpeCode = '''
2853 destElem = unsignedRecipEstimate(srcElem1);
2854 '''
2855 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2856 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2857
2858 vrecpefpCode = '''
2859 FPSCR fpscr = (FPSCR) FpscrExc;
2860 if (flushToZero(srcReg1))
2861 fpscr.idc = 1;
2862 destReg = fpRecipEstimate(fpscr, srcReg1);
2863 FpscrExc = fpscr;
2864 '''
2865 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2866 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2867
2868 vrev16Code = '''
2869 destElem = srcElem1;
2870 unsigned groupSize = ((1 << 1) / sizeof(Element));
2871 unsigned reverseMask = (groupSize - 1);
2872 j = i ^ reverseMask;
2873 '''
2874 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2875 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2876 vrev32Code = '''
2877 destElem = srcElem1;
2878 unsigned groupSize = ((1 << 2) / sizeof(Element));
2879 unsigned reverseMask = (groupSize - 1);
2880 j = i ^ reverseMask;
2881 '''
2882 twoRegMiscInst("vrev32", "NVrev32D",
2883 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2884 twoRegMiscInst("vrev32", "NVrev32Q",
2885 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2886 vrev64Code = '''
2887 destElem = srcElem1;
2888 unsigned groupSize = ((1 << 3) / sizeof(Element));
2889 unsigned reverseMask = (groupSize - 1);
2890 j = i ^ reverseMask;
2891 '''
2892 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2893 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2894
2895 vpaddlCode = '''
2896 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2897 '''
2898 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2899 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2900
2901 vpadalCode = '''
2902 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2903 '''
2904 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2905 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2906
2907 vclsCode = '''
2908 unsigned count = 0;
2909 if (srcElem1 < 0) {
2910 srcElem1 <<= 1;
2911 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2912 count++;
2913 srcElem1 <<= 1;
2914 }
2915 } else {
2916 srcElem1 <<= 1;
2917 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2918 count++;
2919 srcElem1 <<= 1;
2920 }
2921 }
2922 destElem = count;
2923 '''
2924 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2925 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2926
2927 vclzCode = '''
2928 unsigned count = 0;
2929 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2930 count++;
2931 srcElem1 <<= 1;
2932 }
2933 destElem = count;
2934 '''
2935 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2936 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2937
2938 vcntCode = '''
2939 unsigned count = 0;
2940 while (srcElem1 && count < sizeof(Element) * 8) {
2941 count += srcElem1 & 0x1;
2942 srcElem1 >>= 1;
2943 }
2944 destElem = count;
2945 '''
2946
2947 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2948 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2949
2950 vmvnCode = '''
2951 destElem = ~srcElem1;
2952 '''
2953 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2954 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2955
2956 vqabsCode = '''
2957 FPSCR fpscr = (FPSCR) FpscrQc;
2958 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2959 fpscr.qc = 1;
2960 destElem = ~srcElem1;
2961 } else if (srcElem1 < 0) {
2962 destElem = -srcElem1;
2963 } else {
2964 destElem = srcElem1;
2965 }
2966 FpscrQc = fpscr;
2967 '''
2968 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2969 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2970
2971 vqnegCode = '''
2972 FPSCR fpscr = (FPSCR) FpscrQc;
2973 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2974 fpscr.qc = 1;
2975 destElem = ~srcElem1;
2976 } else {
2977 destElem = -srcElem1;
2978 }
2979 FpscrQc = fpscr;
2980 '''
2981 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2982 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2983
2984 vabsCode = '''
2985 if (srcElem1 < 0) {
2986 destElem = -srcElem1;
2987 } else {
2988 destElem = srcElem1;
2989 }
2990 '''
2991
2992 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
2993 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
2994 vabsfpCode = '''
2995 union
2996 {
2997 uint32_t i;
2998 float f;
2999 } cStruct;
3000 cStruct.f = srcReg1;
3001 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3002 destReg = cStruct.f;
3003 '''
3004 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3005 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3006
3007 vnegCode = '''
3008 destElem = -srcElem1;
3009 '''
3010 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3011 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3012 vnegfpCode = '''
3013 destReg = -srcReg1;
3014 '''
3015 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3016 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3017
3018 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3019 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3020 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3021 vcgtfpCode = '''
3022 FPSCR fpscr = (FPSCR) FpscrExc;
3023 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3024 true, true, VfpRoundNearest);
3025 destReg = (res == 0) ? -1 : 0;
3026 if (res == 2.0)
3027 fpscr.ioc = 1;
3028 FpscrExc = fpscr;
3029 '''
3030 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3031 2, vcgtfpCode, toInt = True)
3032 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3033 4, vcgtfpCode, toInt = True)
3034
3035 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3036 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3037 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3038 vcgefpCode = '''
3039 FPSCR fpscr = (FPSCR) FpscrExc;
3040 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3041 true, true, VfpRoundNearest);
3042 destReg = (res == 0) ? -1 : 0;
3043 if (res == 2.0)
3044 fpscr.ioc = 1;
3045 FpscrExc = fpscr;
3046 '''
3047 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3048 2, vcgefpCode, toInt = True)
3049 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3050 4, vcgefpCode, toInt = True)
3051
3052 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3053 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3054 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3055 vceqfpCode = '''
3056 FPSCR fpscr = (FPSCR) FpscrExc;
3057 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3058 true, true, VfpRoundNearest);
3059 destReg = (res == 0) ? -1 : 0;
3060 if (res == 2.0)
3061 fpscr.ioc = 1;
3062 FpscrExc = fpscr;
3063 '''
3064 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3065 2, vceqfpCode, toInt = True)
3066 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3067 4, vceqfpCode, toInt = True)
3068
3069 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3070 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3071 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3072 vclefpCode = '''
3073 FPSCR fpscr = (FPSCR) FpscrExc;
3074 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3075 true, true, VfpRoundNearest);
3076 destReg = (res == 0) ? -1 : 0;
3077 if (res == 2.0)
3078 fpscr.ioc = 1;
3079 FpscrExc = fpscr;
3080 '''
3081 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3082 2, vclefpCode, toInt = True)
3083 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3084 4, vclefpCode, toInt = True)
3085
3086 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3087 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3088 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3089 vcltfpCode = '''
3090 FPSCR fpscr = (FPSCR) FpscrExc;
3091 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3092 true, true, VfpRoundNearest);
3093 destReg = (res == 0) ? -1 : 0;
3094 if (res == 2.0)
3095 fpscr.ioc = 1;
3096 FpscrExc = fpscr;
3097 '''
3098 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3099 2, vcltfpCode, toInt = True)
3100 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3101 4, vcltfpCode, toInt = True)
3102
3103 vswpCode = '''
3104 FloatRegBits mid;
3105 for (unsigned r = 0; r < rCount; r++) {
3106 mid = srcReg1.regs[r];
3107 srcReg1.regs[r] = destReg.regs[r];
3108 destReg.regs[r] = mid;
3109 }
3110 '''
3111 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3112 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3113
3114 vtrnCode = '''
3115 Element mid;
3116 for (unsigned i = 0; i < eCount; i += 2) {
3117 mid = srcReg1.elements[i];
3118 srcReg1.elements[i] = destReg.elements[i + 1];
3119 destReg.elements[i + 1] = mid;
3120 }
3121 '''
3122 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3123 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3124
3125 vuzpCode = '''
3126 Element mid[eCount];
3127 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3128 for (unsigned i = 0; i < eCount / 2; i++) {
3129 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3130 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3131 destReg.elements[i] = destReg.elements[2 * i];
3132 }
3133 for (unsigned i = 0; i < eCount / 2; i++) {
3134 destReg.elements[eCount / 2 + i] = mid[2 * i];
3135 }
3136 '''
3137 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3138 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3139
3140 vzipCode = '''
3141 Element mid[eCount];
3142 memcpy(&mid, &destReg, sizeof(destReg));
3143 for (unsigned i = 0; i < eCount / 2; i++) {
3144 destReg.elements[2 * i] = mid[i];
3145 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3146 }
3147 for (int i = 0; i < eCount / 2; i++) {
3148 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3149 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3150 }
3151 '''
3152 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3153 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3154
3155 vmovnCode = 'destElem = srcElem1;'
3156 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3157
3158 vdupCode = 'destElem = srcElem1;'
3159 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3160 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3161
3162 def vdupGprInst(name, Name, opClass, types, rCount):
3163 global header_output, exec_output
3164 eWalkCode = '''
3165 RegVect destReg;
3166 for (unsigned i = 0; i < eCount; i++) {
3167 destReg.elements[i] = htog((Element)Op1);
3168 }
3169 '''
3170 for reg in range(rCount):
3171 eWalkCode += '''
3172 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3173 ''' % { "reg" : reg }
3174 iop = InstObjParams(name, Name,
3175 "RegRegOp",
3176 { "code": eWalkCode,
3177 "r_count": rCount,
3178 "predicate_test": predicateTest,
3179 "op_class": opClass }, [])
3180 header_output += NeonRegRegOpDeclare.subst(iop)
3181 exec_output += NeonEqualRegExecute.subst(iop)
3182 for type in types:
3183 substDict = { "targs" : type,
3184 "class_name" : Name }
3185 exec_output += NeonExecDeclare.subst(substDict)
3186 vdupGprInst("vdup", "NVdupDGpr", "SimdAluOp", smallUnsignedTypes, 2)
3187 vdupGprInst("vdup", "NVdupQGpr", "SimdAluOp", smallUnsignedTypes, 4)
3188
3189 vmovCode = 'destElem = imm;'
3190 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3191 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3192
3193 vorrCode = 'destElem |= imm;'
3194 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3195 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3196
3197 vmvnCode = 'destElem = ~imm;'
3198 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3199 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3200
3201 vbicCode = 'destElem &= ~imm;'
3202 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3203 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3204
3205 vqmovnCode = '''
3206 FPSCR fpscr = (FPSCR) FpscrQc;
3207 destElem = srcElem1;
3208 if ((BigElement)destElem != srcElem1) {
3209 fpscr.qc = 1;
3210 destElem = mask(sizeof(Element) * 8 - 1);
3211 if (srcElem1 < 0)
3212 destElem = ~destElem;
3213 }
3214 FpscrQc = fpscr;
3215 '''
3216 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3217
3218 vqmovunCode = '''
3219 FPSCR fpscr = (FPSCR) FpscrQc;
3220 destElem = srcElem1;
3221 if ((BigElement)destElem != srcElem1) {
3222 fpscr.qc = 1;
3223 destElem = mask(sizeof(Element) * 8);
3224 }
3225 FpscrQc = fpscr;
3226 '''
3227 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3228 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3229
3230 vqmovunsCode = '''
3231 FPSCR fpscr = (FPSCR) FpscrQc;
3232 destElem = srcElem1;
3233 if (srcElem1 < 0 ||
3234 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3235 fpscr.qc = 1;
3236 destElem = mask(sizeof(Element) * 8);
3237 if (srcElem1 < 0)
3238 destElem = ~destElem;
3239 }
3240 FpscrQc = fpscr;
3241 '''
3242 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3243 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3244
3245 def buildVext(name, Name, opClass, types, rCount, op):
3246 global header_output, exec_output
3247 eWalkCode = '''
3248 RegVect srcReg1, srcReg2, destReg;
3249 '''
3250 for reg in range(rCount):
3251 eWalkCode += simdEnabledCheckCode + '''
3252 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3253 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3254 ''' % { "reg" : reg }
3255 eWalkCode += op
3256 for reg in range(rCount):
3257 eWalkCode += '''
3258 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3259 ''' % { "reg" : reg }
3260 iop = InstObjParams(name, Name,
3261 "RegRegRegImmOp",
3262 { "code": eWalkCode,
3263 "r_count": rCount,
3264 "predicate_test": predicateTest,
3265 "op_class": opClass }, [])
3266 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3267 exec_output += NeonEqualRegExecute.subst(iop)
3268 for type in types:
3269 substDict = { "targs" : type,
3270 "class_name" : Name }
3271 exec_output += NeonExecDeclare.subst(substDict)
3272
3273 vextCode = '''
3274 for (unsigned i = 0; i < eCount; i++) {
3275 unsigned index = i + imm;
3276 if (index < eCount) {
3277 destReg.elements[i] = srcReg1.elements[index];
3278 } else {
3279 index -= eCount;
997 }
998 ''' % { "op" : op, "readDest" : readDestCode }
999 for reg in range(rCount):
1000 eWalkCode += '''
1001 FpDestP%(reg)d = destRegs[%(reg)d];
1002 ''' % { "reg" : reg }
1003 iop = InstObjParams(name, Name,
1004 "FpRegRegRegImmOp",
1005 { "code": eWalkCode,
1006 "r_count": rCount,
1007 "predicate_test": predicateTest,
1008 "op_class": opClass }, [])
1009 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1010 exec_output += NeonEqualRegExecute.subst(iop)
1011 for type in types:
1012 substDict = { "targs" : type,
1013 "class_name" : Name }
1014 exec_output += NeonExecDeclare.subst(substDict)
1015
1016 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1017 readDest=False, toInt=False, fromInt=False):
1018 global header_output, exec_output
1019 eWalkCode = simdEnabledCheckCode + '''
1020 RegVect srcRegs1, destRegs;
1021 '''
1022 for reg in range(rCount):
1023 eWalkCode += '''
1024 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1025 ''' % { "reg" : reg }
1026 if readDest:
1027 eWalkCode += '''
1028 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1029 ''' % { "reg" : reg }
1030 readDestCode = ''
1031 if readDest:
1032 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1033 if toInt:
1034 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1035 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1036 if fromInt:
1037 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1038 declDest = 'Element destElem;'
1039 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1040 if toInt:
1041 declDest = 'FloatRegBits destReg;'
1042 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1043 eWalkCode += '''
1044 for (unsigned i = 0; i < eCount; i++) {
1045 %(readOp)s
1046 %(declDest)s
1047 %(readDest)s
1048 %(op)s
1049 %(writeDest)s
1050 }
1051 ''' % { "readOp" : readOpCode,
1052 "declDest" : declDest,
1053 "readDest" : readDestCode,
1054 "op" : op,
1055 "writeDest" : writeDestCode }
1056 for reg in range(rCount):
1057 eWalkCode += '''
1058 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1059 ''' % { "reg" : reg }
1060 iop = InstObjParams(name, Name,
1061 "RegRegImmOp",
1062 { "code": eWalkCode,
1063 "r_count": rCount,
1064 "predicate_test": predicateTest,
1065 "op_class": opClass }, [])
1066 header_output += NeonRegRegImmOpDeclare.subst(iop)
1067 exec_output += NeonEqualRegExecute.subst(iop)
1068 for type in types:
1069 substDict = { "targs" : type,
1070 "class_name" : Name }
1071 exec_output += NeonExecDeclare.subst(substDict)
1072
1073 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1074 global header_output, exec_output
1075 eWalkCode = simdEnabledCheckCode + '''
1076 BigRegVect srcReg1;
1077 RegVect destReg;
1078 '''
1079 for reg in range(4):
1080 eWalkCode += '''
1081 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1082 ''' % { "reg" : reg }
1083 if readDest:
1084 for reg in range(2):
1085 eWalkCode += '''
1086 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1087 ''' % { "reg" : reg }
1088 readDestCode = ''
1089 if readDest:
1090 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1091 eWalkCode += '''
1092 for (unsigned i = 0; i < eCount; i++) {
1093 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1094 Element destElem;
1095 %(readDest)s
1096 %(op)s
1097 destReg.elements[i] = htog(destElem);
1098 }
1099 ''' % { "op" : op, "readDest" : readDestCode }
1100 for reg in range(2):
1101 eWalkCode += '''
1102 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1103 ''' % { "reg" : reg }
1104 iop = InstObjParams(name, Name,
1105 "RegRegImmOp",
1106 { "code": eWalkCode,
1107 "r_count": 2,
1108 "predicate_test": predicateTest,
1109 "op_class": opClass }, [])
1110 header_output += NeonRegRegImmOpDeclare.subst(iop)
1111 exec_output += NeonUnequalRegExecute.subst(iop)
1112 for type in types:
1113 substDict = { "targs" : type,
1114 "class_name" : Name }
1115 exec_output += NeonExecDeclare.subst(substDict)
1116
1117 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1118 global header_output, exec_output
1119 eWalkCode = simdEnabledCheckCode + '''
1120 RegVect srcReg1;
1121 BigRegVect destReg;
1122 '''
1123 for reg in range(2):
1124 eWalkCode += '''
1125 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1126 ''' % { "reg" : reg }
1127 if readDest:
1128 for reg in range(4):
1129 eWalkCode += '''
1130 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1131 ''' % { "reg" : reg }
1132 readDestCode = ''
1133 if readDest:
1134 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1135 eWalkCode += '''
1136 for (unsigned i = 0; i < eCount; i++) {
1137 Element srcElem1 = gtoh(srcReg1.elements[i]);
1138 BigElement destElem;
1139 %(readDest)s
1140 %(op)s
1141 destReg.elements[i] = htog(destElem);
1142 }
1143 ''' % { "op" : op, "readDest" : readDestCode }
1144 for reg in range(4):
1145 eWalkCode += '''
1146 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1147 ''' % { "reg" : reg }
1148 iop = InstObjParams(name, Name,
1149 "RegRegImmOp",
1150 { "code": eWalkCode,
1151 "r_count": 2,
1152 "predicate_test": predicateTest,
1153 "op_class": opClass }, [])
1154 header_output += NeonRegRegImmOpDeclare.subst(iop)
1155 exec_output += NeonUnequalRegExecute.subst(iop)
1156 for type in types:
1157 substDict = { "targs" : type,
1158 "class_name" : Name }
1159 exec_output += NeonExecDeclare.subst(substDict)
1160
1161 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1162 global header_output, exec_output
1163 eWalkCode = simdEnabledCheckCode + '''
1164 RegVect srcReg1, destReg;
1165 '''
1166 for reg in range(rCount):
1167 eWalkCode += '''
1168 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1169 ''' % { "reg" : reg }
1170 if readDest:
1171 eWalkCode += '''
1172 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1173 ''' % { "reg" : reg }
1174 readDestCode = ''
1175 if readDest:
1176 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1177 eWalkCode += '''
1178 for (unsigned i = 0; i < eCount; i++) {
1179 unsigned j = i;
1180 Element srcElem1 = gtoh(srcReg1.elements[i]);
1181 Element destElem;
1182 %(readDest)s
1183 %(op)s
1184 destReg.elements[j] = htog(destElem);
1185 }
1186 ''' % { "op" : op, "readDest" : readDestCode }
1187 for reg in range(rCount):
1188 eWalkCode += '''
1189 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1190 ''' % { "reg" : reg }
1191 iop = InstObjParams(name, Name,
1192 "RegRegOp",
1193 { "code": eWalkCode,
1194 "r_count": rCount,
1195 "predicate_test": predicateTest,
1196 "op_class": opClass }, [])
1197 header_output += NeonRegRegOpDeclare.subst(iop)
1198 exec_output += NeonEqualRegExecute.subst(iop)
1199 for type in types:
1200 substDict = { "targs" : type,
1201 "class_name" : Name }
1202 exec_output += NeonExecDeclare.subst(substDict)
1203
1204 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1205 global header_output, exec_output
1206 eWalkCode = simdEnabledCheckCode + '''
1207 RegVect srcReg1, destReg;
1208 '''
1209 for reg in range(rCount):
1210 eWalkCode += '''
1211 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1212 ''' % { "reg" : reg }
1213 if readDest:
1214 eWalkCode += '''
1215 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1216 ''' % { "reg" : reg }
1217 readDestCode = ''
1218 if readDest:
1219 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1220 eWalkCode += '''
1221 for (unsigned i = 0; i < eCount; i++) {
1222 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1223 Element destElem;
1224 %(readDest)s
1225 %(op)s
1226 destReg.elements[i] = htog(destElem);
1227 }
1228 ''' % { "op" : op, "readDest" : readDestCode }
1229 for reg in range(rCount):
1230 eWalkCode += '''
1231 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1232 ''' % { "reg" : reg }
1233 iop = InstObjParams(name, Name,
1234 "RegRegImmOp",
1235 { "code": eWalkCode,
1236 "r_count": rCount,
1237 "predicate_test": predicateTest,
1238 "op_class": opClass }, [])
1239 header_output += NeonRegRegImmOpDeclare.subst(iop)
1240 exec_output += NeonEqualRegExecute.subst(iop)
1241 for type in types:
1242 substDict = { "targs" : type,
1243 "class_name" : Name }
1244 exec_output += NeonExecDeclare.subst(substDict)
1245
1246 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1247 global header_output, exec_output
1248 eWalkCode = simdEnabledCheckCode + '''
1249 RegVect srcReg1, destReg;
1250 '''
1251 for reg in range(rCount):
1252 eWalkCode += '''
1253 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1254 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1255 ''' % { "reg" : reg }
1256 if readDest:
1257 eWalkCode += '''
1258 ''' % { "reg" : reg }
1259 readDestCode = ''
1260 if readDest:
1261 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1262 eWalkCode += op
1263 for reg in range(rCount):
1264 eWalkCode += '''
1265 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1266 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1267 ''' % { "reg" : reg }
1268 iop = InstObjParams(name, Name,
1269 "RegRegOp",
1270 { "code": eWalkCode,
1271 "r_count": rCount,
1272 "predicate_test": predicateTest,
1273 "op_class": opClass }, [])
1274 header_output += NeonRegRegOpDeclare.subst(iop)
1275 exec_output += NeonEqualRegExecute.subst(iop)
1276 for type in types:
1277 substDict = { "targs" : type,
1278 "class_name" : Name }
1279 exec_output += NeonExecDeclare.subst(substDict)
1280
1281 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1282 readDest=False, toInt=False):
1283 global header_output, exec_output
1284 eWalkCode = simdEnabledCheckCode + '''
1285 typedef FloatReg FloatVect[rCount];
1286 FloatVect srcRegs1;
1287 '''
1288 if toInt:
1289 eWalkCode += 'RegVect destRegs;\n'
1290 else:
1291 eWalkCode += 'FloatVect destRegs;\n'
1292 for reg in range(rCount):
1293 eWalkCode += '''
1294 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1295 ''' % { "reg" : reg }
1296 if readDest:
1297 if toInt:
1298 eWalkCode += '''
1299 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1300 ''' % { "reg" : reg }
1301 else:
1302 eWalkCode += '''
1303 destRegs[%(reg)d] = FpDestP%(reg)d;
1304 ''' % { "reg" : reg }
1305 readDestCode = ''
1306 if readDest:
1307 readDestCode = 'destReg = destRegs[i];'
1308 destType = 'FloatReg'
1309 writeDest = 'destRegs[r] = destReg;'
1310 if toInt:
1311 destType = 'FloatRegBits'
1312 writeDest = 'destRegs.regs[r] = destReg;'
1313 eWalkCode += '''
1314 for (unsigned r = 0; r < rCount; r++) {
1315 FloatReg srcReg1 = srcRegs1[r];
1316 %(destType)s destReg;
1317 %(readDest)s
1318 %(op)s
1319 %(writeDest)s
1320 }
1321 ''' % { "op" : op,
1322 "readDest" : readDestCode,
1323 "destType" : destType,
1324 "writeDest" : writeDest }
1325 for reg in range(rCount):
1326 if toInt:
1327 eWalkCode += '''
1328 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1329 ''' % { "reg" : reg }
1330 else:
1331 eWalkCode += '''
1332 FpDestP%(reg)d = destRegs[%(reg)d];
1333 ''' % { "reg" : reg }
1334 iop = InstObjParams(name, Name,
1335 "FpRegRegOp",
1336 { "code": eWalkCode,
1337 "r_count": rCount,
1338 "predicate_test": predicateTest,
1339 "op_class": opClass }, [])
1340 header_output += NeonRegRegOpDeclare.subst(iop)
1341 exec_output += NeonEqualRegExecute.subst(iop)
1342 for type in types:
1343 substDict = { "targs" : type,
1344 "class_name" : Name }
1345 exec_output += NeonExecDeclare.subst(substDict)
1346
1347 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1348 global header_output, exec_output
1349 eWalkCode = simdEnabledCheckCode + '''
1350 RegVect srcRegs;
1351 BigRegVect destReg;
1352 '''
1353 for reg in range(rCount):
1354 eWalkCode += '''
1355 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1356 ''' % { "reg" : reg }
1357 if readDest:
1358 eWalkCode += '''
1359 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1360 ''' % { "reg" : reg }
1361 readDestCode = ''
1362 if readDest:
1363 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1364 eWalkCode += '''
1365 for (unsigned i = 0; i < eCount / 2; i++) {
1366 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1367 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1368 BigElement destElem;
1369 %(readDest)s
1370 %(op)s
1371 destReg.elements[i] = htog(destElem);
1372 }
1373 ''' % { "op" : op, "readDest" : readDestCode }
1374 for reg in range(rCount):
1375 eWalkCode += '''
1376 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1377 ''' % { "reg" : reg }
1378 iop = InstObjParams(name, Name,
1379 "RegRegOp",
1380 { "code": eWalkCode,
1381 "r_count": rCount,
1382 "predicate_test": predicateTest,
1383 "op_class": opClass }, [])
1384 header_output += NeonRegRegOpDeclare.subst(iop)
1385 exec_output += NeonUnequalRegExecute.subst(iop)
1386 for type in types:
1387 substDict = { "targs" : type,
1388 "class_name" : Name }
1389 exec_output += NeonExecDeclare.subst(substDict)
1390
1391 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1392 global header_output, exec_output
1393 eWalkCode = simdEnabledCheckCode + '''
1394 BigRegVect srcReg1;
1395 RegVect destReg;
1396 '''
1397 for reg in range(4):
1398 eWalkCode += '''
1399 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1400 ''' % { "reg" : reg }
1401 if readDest:
1402 for reg in range(2):
1403 eWalkCode += '''
1404 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1405 ''' % { "reg" : reg }
1406 readDestCode = ''
1407 if readDest:
1408 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1409 eWalkCode += '''
1410 for (unsigned i = 0; i < eCount; i++) {
1411 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1412 Element destElem;
1413 %(readDest)s
1414 %(op)s
1415 destReg.elements[i] = htog(destElem);
1416 }
1417 ''' % { "op" : op, "readDest" : readDestCode }
1418 for reg in range(2):
1419 eWalkCode += '''
1420 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1421 ''' % { "reg" : reg }
1422 iop = InstObjParams(name, Name,
1423 "RegRegOp",
1424 { "code": eWalkCode,
1425 "r_count": 2,
1426 "predicate_test": predicateTest,
1427 "op_class": opClass }, [])
1428 header_output += NeonRegRegOpDeclare.subst(iop)
1429 exec_output += NeonUnequalRegExecute.subst(iop)
1430 for type in types:
1431 substDict = { "targs" : type,
1432 "class_name" : Name }
1433 exec_output += NeonExecDeclare.subst(substDict)
1434
1435 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1436 global header_output, exec_output
1437 eWalkCode = simdEnabledCheckCode + '''
1438 RegVect destReg;
1439 '''
1440 if readDest:
1441 for reg in range(rCount):
1442 eWalkCode += '''
1443 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1444 ''' % { "reg" : reg }
1445 readDestCode = ''
1446 if readDest:
1447 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1448 eWalkCode += '''
1449 for (unsigned i = 0; i < eCount; i++) {
1450 Element destElem;
1451 %(readDest)s
1452 %(op)s
1453 destReg.elements[i] = htog(destElem);
1454 }
1455 ''' % { "op" : op, "readDest" : readDestCode }
1456 for reg in range(rCount):
1457 eWalkCode += '''
1458 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1459 ''' % { "reg" : reg }
1460 iop = InstObjParams(name, Name,
1461 "RegImmOp",
1462 { "code": eWalkCode,
1463 "r_count": rCount,
1464 "predicate_test": predicateTest,
1465 "op_class": opClass }, [])
1466 header_output += NeonRegImmOpDeclare.subst(iop)
1467 exec_output += NeonEqualRegExecute.subst(iop)
1468 for type in types:
1469 substDict = { "targs" : type,
1470 "class_name" : Name }
1471 exec_output += NeonExecDeclare.subst(substDict)
1472
1473 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1474 global header_output, exec_output
1475 eWalkCode = simdEnabledCheckCode + '''
1476 RegVect srcReg1;
1477 BigRegVect destReg;
1478 '''
1479 for reg in range(2):
1480 eWalkCode += '''
1481 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1482 ''' % { "reg" : reg }
1483 if readDest:
1484 for reg in range(4):
1485 eWalkCode += '''
1486 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1487 ''' % { "reg" : reg }
1488 readDestCode = ''
1489 if readDest:
1490 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1491 eWalkCode += '''
1492 for (unsigned i = 0; i < eCount; i++) {
1493 Element srcElem1 = gtoh(srcReg1.elements[i]);
1494 BigElement destElem;
1495 %(readDest)s
1496 %(op)s
1497 destReg.elements[i] = htog(destElem);
1498 }
1499 ''' % { "op" : op, "readDest" : readDestCode }
1500 for reg in range(4):
1501 eWalkCode += '''
1502 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1503 ''' % { "reg" : reg }
1504 iop = InstObjParams(name, Name,
1505 "RegRegOp",
1506 { "code": eWalkCode,
1507 "r_count": 2,
1508 "predicate_test": predicateTest,
1509 "op_class": opClass }, [])
1510 header_output += NeonRegRegOpDeclare.subst(iop)
1511 exec_output += NeonUnequalRegExecute.subst(iop)
1512 for type in types:
1513 substDict = { "targs" : type,
1514 "class_name" : Name }
1515 exec_output += NeonExecDeclare.subst(substDict)
1516
1517 vhaddCode = '''
1518 Element carryBit =
1519 (((unsigned)srcElem1 & 0x1) +
1520 ((unsigned)srcElem2 & 0x1)) >> 1;
1521 // Use division instead of a shift to ensure the sign extension works
1522 // right. The compiler will figure out if it can be a shift. Mask the
1523 // inputs so they get truncated correctly.
1524 destElem = (((srcElem1 & ~(Element)1) / 2) +
1525 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1526 '''
1527 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1528 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1529
1530 vrhaddCode = '''
1531 Element carryBit =
1532 (((unsigned)srcElem1 & 0x1) +
1533 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1534 // Use division instead of a shift to ensure the sign extension works
1535 // right. The compiler will figure out if it can be a shift. Mask the
1536 // inputs so they get truncated correctly.
1537 destElem = (((srcElem1 & ~(Element)1) / 2) +
1538 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1539 '''
1540 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1541 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1542
1543 vhsubCode = '''
1544 Element barrowBit =
1545 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1546 // Use division instead of a shift to ensure the sign extension works
1547 // right. The compiler will figure out if it can be a shift. Mask the
1548 // inputs so they get truncated correctly.
1549 destElem = (((srcElem1 & ~(Element)1) / 2) -
1550 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1551 '''
1552 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1553 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1554
1555 vandCode = '''
1556 destElem = srcElem1 & srcElem2;
1557 '''
1558 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1559 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1560
1561 vbicCode = '''
1562 destElem = srcElem1 & ~srcElem2;
1563 '''
1564 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1565 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1566
1567 vorrCode = '''
1568 destElem = srcElem1 | srcElem2;
1569 '''
1570 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1571 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1572
1573 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1574 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1575
1576 vornCode = '''
1577 destElem = srcElem1 | ~srcElem2;
1578 '''
1579 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1580 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1581
1582 veorCode = '''
1583 destElem = srcElem1 ^ srcElem2;
1584 '''
1585 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1586 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1587
1588 vbifCode = '''
1589 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1590 '''
1591 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1592 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1593 vbitCode = '''
1594 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1595 '''
1596 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1597 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1598 vbslCode = '''
1599 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1600 '''
1601 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1602 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1603
1604 vmaxCode = '''
1605 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1606 '''
1607 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1608 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1609
1610 vminCode = '''
1611 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1612 '''
1613 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1614 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1615
1616 vaddCode = '''
1617 destElem = srcElem1 + srcElem2;
1618 '''
1619 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1620 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1621
1622 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1623 2, vaddCode, pairwise=True)
1624 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1625 4, vaddCode, pairwise=True)
1626 vaddlwCode = '''
1627 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1628 '''
1629 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1630 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1631 vaddhnCode = '''
1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1633 (sizeof(Element) * 8);
1634 '''
1635 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1636 vraddhnCode = '''
1637 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1638 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1639 (sizeof(Element) * 8);
1640 '''
1641 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1642
1643 vsubCode = '''
1644 destElem = srcElem1 - srcElem2;
1645 '''
1646 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1647 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1648 vsublwCode = '''
1649 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1650 '''
1651 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1652 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1653
1654 vqaddUCode = '''
1655 destElem = srcElem1 + srcElem2;
1656 FPSCR fpscr = (FPSCR) FpscrQc;
1657 if (destElem < srcElem1 || destElem < srcElem2) {
1658 destElem = (Element)(-1);
1659 fpscr.qc = 1;
1660 }
1661 FpscrQc = fpscr;
1662 '''
1663 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1664 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1665 vsubhnCode = '''
1666 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1667 (sizeof(Element) * 8);
1668 '''
1669 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1670 vrsubhnCode = '''
1671 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1672 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1673 (sizeof(Element) * 8);
1674 '''
1675 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1676
1677 vqaddSCode = '''
1678 destElem = srcElem1 + srcElem2;
1679 FPSCR fpscr = (FPSCR) FpscrQc;
1680 bool negDest = (destElem < 0);
1681 bool negSrc1 = (srcElem1 < 0);
1682 bool negSrc2 = (srcElem2 < 0);
1683 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1684 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1685 if (negDest)
1686 destElem -= 1;
1687 fpscr.qc = 1;
1688 }
1689 FpscrQc = fpscr;
1690 '''
1691 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1692 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1693
1694 vqsubUCode = '''
1695 destElem = srcElem1 - srcElem2;
1696 FPSCR fpscr = (FPSCR) FpscrQc;
1697 if (destElem > srcElem1) {
1698 destElem = 0;
1699 fpscr.qc = 1;
1700 }
1701 FpscrQc = fpscr;
1702 '''
1703 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1704 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1705
1706 vqsubSCode = '''
1707 destElem = srcElem1 - srcElem2;
1708 FPSCR fpscr = (FPSCR) FpscrQc;
1709 bool negDest = (destElem < 0);
1710 bool negSrc1 = (srcElem1 < 0);
1711 bool posSrc2 = (srcElem2 >= 0);
1712 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1713 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1714 if (negDest)
1715 destElem -= 1;
1716 fpscr.qc = 1;
1717 }
1718 FpscrQc = fpscr;
1719 '''
1720 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1721 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1722
1723 vcgtCode = '''
1724 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1725 '''
1726 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1727 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1728
1729 vcgeCode = '''
1730 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1731 '''
1732 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1733 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1734
1735 vceqCode = '''
1736 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1737 '''
1738 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1739 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1740
1741 vshlCode = '''
1742 int16_t shiftAmt = (int8_t)srcElem2;
1743 if (shiftAmt < 0) {
1744 shiftAmt = -shiftAmt;
1745 if (shiftAmt >= sizeof(Element) * 8) {
1746 shiftAmt = sizeof(Element) * 8 - 1;
1747 destElem = 0;
1748 } else {
1749 destElem = (srcElem1 >> shiftAmt);
1750 }
1751 // Make sure the right shift sign extended when it should.
1752 if (ltz(srcElem1) && !ltz(destElem)) {
1753 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1754 1 - shiftAmt));
1755 }
1756 } else {
1757 if (shiftAmt >= sizeof(Element) * 8) {
1758 destElem = 0;
1759 } else {
1760 destElem = srcElem1 << shiftAmt;
1761 }
1762 }
1763 '''
1764 threeEqualRegInst("vshl", "VshlD", "SimdAluOp", allTypes, 2, vshlCode)
1765 threeEqualRegInst("vshl", "VshlQ", "SimdAluOp", allTypes, 4, vshlCode)
1766
1767 vrshlCode = '''
1768 int16_t shiftAmt = (int8_t)srcElem2;
1769 if (shiftAmt < 0) {
1770 shiftAmt = -shiftAmt;
1771 Element rBit = 0;
1772 if (shiftAmt <= sizeof(Element) * 8)
1773 rBit = bits(srcElem1, shiftAmt - 1);
1774 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1775 rBit = 1;
1776 if (shiftAmt >= sizeof(Element) * 8) {
1777 shiftAmt = sizeof(Element) * 8 - 1;
1778 destElem = 0;
1779 } else {
1780 destElem = (srcElem1 >> shiftAmt);
1781 }
1782 // Make sure the right shift sign extended when it should.
1783 if (ltz(srcElem1) && !ltz(destElem)) {
1784 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1785 1 - shiftAmt));
1786 }
1787 destElem += rBit;
1788 } else if (shiftAmt > 0) {
1789 if (shiftAmt >= sizeof(Element) * 8) {
1790 destElem = 0;
1791 } else {
1792 destElem = srcElem1 << shiftAmt;
1793 }
1794 } else {
1795 destElem = srcElem1;
1796 }
1797 '''
1798 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1799 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1800
1801 vqshlUCode = '''
1802 int16_t shiftAmt = (int8_t)srcElem2;
1803 FPSCR fpscr = (FPSCR) FpscrQc;
1804 if (shiftAmt < 0) {
1805 shiftAmt = -shiftAmt;
1806 if (shiftAmt >= sizeof(Element) * 8) {
1807 shiftAmt = sizeof(Element) * 8 - 1;
1808 destElem = 0;
1809 } else {
1810 destElem = (srcElem1 >> shiftAmt);
1811 }
1812 } else if (shiftAmt > 0) {
1813 if (shiftAmt >= sizeof(Element) * 8) {
1814 if (srcElem1 != 0) {
1815 destElem = mask(sizeof(Element) * 8);
1816 fpscr.qc = 1;
1817 } else {
1818 destElem = 0;
1819 }
1820 } else {
1821 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1822 sizeof(Element) * 8 - shiftAmt)) {
1823 destElem = mask(sizeof(Element) * 8);
1824 fpscr.qc = 1;
1825 } else {
1826 destElem = srcElem1 << shiftAmt;
1827 }
1828 }
1829 } else {
1830 destElem = srcElem1;
1831 }
1832 FpscrQc = fpscr;
1833 '''
1834 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1835 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1836
1837 vqshlSCode = '''
1838 int16_t shiftAmt = (int8_t)srcElem2;
1839 FPSCR fpscr = (FPSCR) FpscrQc;
1840 if (shiftAmt < 0) {
1841 shiftAmt = -shiftAmt;
1842 if (shiftAmt >= sizeof(Element) * 8) {
1843 shiftAmt = sizeof(Element) * 8 - 1;
1844 destElem = 0;
1845 } else {
1846 destElem = (srcElem1 >> shiftAmt);
1847 }
1848 // Make sure the right shift sign extended when it should.
1849 if (srcElem1 < 0 && destElem >= 0) {
1850 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1851 1 - shiftAmt));
1852 }
1853 } else if (shiftAmt > 0) {
1854 bool sat = false;
1855 if (shiftAmt >= sizeof(Element) * 8) {
1856 if (srcElem1 != 0)
1857 sat = true;
1858 else
1859 destElem = 0;
1860 } else {
1861 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1862 sizeof(Element) * 8 - 1 - shiftAmt) !=
1863 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1864 sat = true;
1865 } else {
1866 destElem = srcElem1 << shiftAmt;
1867 }
1868 }
1869 if (sat) {
1870 fpscr.qc = 1;
1871 destElem = mask(sizeof(Element) * 8 - 1);
1872 if (srcElem1 < 0)
1873 destElem = ~destElem;
1874 }
1875 } else {
1876 destElem = srcElem1;
1877 }
1878 FpscrQc = fpscr;
1879 '''
1880 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1881 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1882
1883 vqrshlUCode = '''
1884 int16_t shiftAmt = (int8_t)srcElem2;
1885 FPSCR fpscr = (FPSCR) FpscrQc;
1886 if (shiftAmt < 0) {
1887 shiftAmt = -shiftAmt;
1888 Element rBit = 0;
1889 if (shiftAmt <= sizeof(Element) * 8)
1890 rBit = bits(srcElem1, shiftAmt - 1);
1891 if (shiftAmt >= sizeof(Element) * 8) {
1892 shiftAmt = sizeof(Element) * 8 - 1;
1893 destElem = 0;
1894 } else {
1895 destElem = (srcElem1 >> shiftAmt);
1896 }
1897 destElem += rBit;
1898 } else {
1899 if (shiftAmt >= sizeof(Element) * 8) {
1900 if (srcElem1 != 0) {
1901 destElem = mask(sizeof(Element) * 8);
1902 fpscr.qc = 1;
1903 } else {
1904 destElem = 0;
1905 }
1906 } else {
1907 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1908 sizeof(Element) * 8 - shiftAmt)) {
1909 destElem = mask(sizeof(Element) * 8);
1910 fpscr.qc = 1;
1911 } else {
1912 destElem = srcElem1 << shiftAmt;
1913 }
1914 }
1915 }
1916 FpscrQc = fpscr;
1917 '''
1918 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1919 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1920
1921 vqrshlSCode = '''
1922 int16_t shiftAmt = (int8_t)srcElem2;
1923 FPSCR fpscr = (FPSCR) FpscrQc;
1924 if (shiftAmt < 0) {
1925 shiftAmt = -shiftAmt;
1926 Element rBit = 0;
1927 if (shiftAmt <= sizeof(Element) * 8)
1928 rBit = bits(srcElem1, shiftAmt - 1);
1929 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1930 rBit = 1;
1931 if (shiftAmt >= sizeof(Element) * 8) {
1932 shiftAmt = sizeof(Element) * 8 - 1;
1933 destElem = 0;
1934 } else {
1935 destElem = (srcElem1 >> shiftAmt);
1936 }
1937 // Make sure the right shift sign extended when it should.
1938 if (srcElem1 < 0 && destElem >= 0) {
1939 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1940 1 - shiftAmt));
1941 }
1942 destElem += rBit;
1943 } else if (shiftAmt > 0) {
1944 bool sat = false;
1945 if (shiftAmt >= sizeof(Element) * 8) {
1946 if (srcElem1 != 0)
1947 sat = true;
1948 else
1949 destElem = 0;
1950 } else {
1951 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1952 sizeof(Element) * 8 - 1 - shiftAmt) !=
1953 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1954 sat = true;
1955 } else {
1956 destElem = srcElem1 << shiftAmt;
1957 }
1958 }
1959 if (sat) {
1960 fpscr.qc = 1;
1961 destElem = mask(sizeof(Element) * 8 - 1);
1962 if (srcElem1 < 0)
1963 destElem = ~destElem;
1964 }
1965 } else {
1966 destElem = srcElem1;
1967 }
1968 FpscrQc = fpscr;
1969 '''
1970 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1971 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1972
1973 vabaCode = '''
1974 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1975 (srcElem2 - srcElem1);
1976 '''
1977 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1978 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1979 vabalCode = '''
1980 destElem += (srcElem1 > srcElem2) ?
1981 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1982 ((BigElement)srcElem2 - (BigElement)srcElem1);
1983 '''
1984 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1985
1986 vabdCode = '''
1987 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1988 (srcElem2 - srcElem1);
1989 '''
1990 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1991 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1992 vabdlCode = '''
1993 destElem = (srcElem1 > srcElem2) ?
1994 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1995 ((BigElement)srcElem2 - (BigElement)srcElem1);
1996 '''
1997 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1998
1999 vtstCode = '''
2000 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2001 '''
2002 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2003 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2004
2005 vmulCode = '''
2006 destElem = srcElem1 * srcElem2;
2007 '''
2008 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2009 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2010 vmullCode = '''
2011 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2012 '''
2013 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2014
2015 vmlaCode = '''
2016 destElem = destElem + srcElem1 * srcElem2;
2017 '''
2018 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2019 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2020 vmlalCode = '''
2021 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2022 '''
2023 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2024
2025 vqdmlalCode = '''
2026 FPSCR fpscr = (FPSCR) FpscrQc;
2027 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2028 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2029 Element halfNeg = maxNeg / 2;
2030 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2031 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2032 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2033 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2034 fpscr.qc = 1;
2035 }
2036 bool negPreDest = ltz(destElem);
2037 destElem += midElem;
2038 bool negDest = ltz(destElem);
2039 bool negMid = ltz(midElem);
2040 if (negPreDest == negMid && negMid != negDest) {
2041 destElem = mask(sizeof(BigElement) * 8 - 1);
2042 if (negPreDest)
2043 destElem = ~destElem;
2044 fpscr.qc = 1;
2045 }
2046 FpscrQc = fpscr;
2047 '''
2048 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2049
2050 vqdmlslCode = '''
2051 FPSCR fpscr = (FPSCR) FpscrQc;
2052 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2053 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2054 Element halfNeg = maxNeg / 2;
2055 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2056 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2057 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2058 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2059 fpscr.qc = 1;
2060 }
2061 bool negPreDest = ltz(destElem);
2062 destElem -= midElem;
2063 bool negDest = ltz(destElem);
2064 bool posMid = ltz((BigElement)-midElem);
2065 if (negPreDest == posMid && posMid != negDest) {
2066 destElem = mask(sizeof(BigElement) * 8 - 1);
2067 if (negPreDest)
2068 destElem = ~destElem;
2069 fpscr.qc = 1;
2070 }
2071 FpscrQc = fpscr;
2072 '''
2073 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2074
2075 vqdmullCode = '''
2076 FPSCR fpscr = (FPSCR) FpscrQc;
2077 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2078 if (srcElem1 == srcElem2 &&
2079 srcElem1 == (Element)((Element)1 <<
2080 (Element)(sizeof(Element) * 8 - 1))) {
2081 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2082 fpscr.qc = 1;
2083 }
2084 FpscrQc = fpscr;
2085 '''
2086 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2087
2088 vmlsCode = '''
2089 destElem = destElem - srcElem1 * srcElem2;
2090 '''
2091 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2092 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2093 vmlslCode = '''
2094 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2095 '''
2096 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2097
2098 vmulpCode = '''
2099 destElem = 0;
2100 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2101 if (bits(srcElem2, j))
2102 destElem ^= srcElem1 << j;
2103 }
2104 '''
2105 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2106 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2107 vmullpCode = '''
2108 destElem = 0;
2109 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2110 if (bits(srcElem2, j))
2111 destElem ^= (BigElement)srcElem1 << j;
2112 }
2113 '''
2114 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2115
2116 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2117 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2118
2119 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2120 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2121
2122 vqdmulhCode = '''
2123 FPSCR fpscr = (FPSCR) FpscrQc;
2124 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2125 (sizeof(Element) * 8);
2126 if (srcElem1 == srcElem2 &&
2127 srcElem1 == (Element)((Element)1 <<
2128 (sizeof(Element) * 8 - 1))) {
2129 destElem = ~srcElem1;
2130 fpscr.qc = 1;
2131 }
2132 FpscrQc = fpscr;
2133 '''
2134 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2135 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2136
2137 vqrdmulhCode = '''
2138 FPSCR fpscr = (FPSCR) FpscrQc;
2139 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2140 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2141 (sizeof(Element) * 8);
2142 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2143 Element halfNeg = maxNeg / 2;
2144 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2145 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2146 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2147 if (destElem < 0) {
2148 destElem = mask(sizeof(Element) * 8 - 1);
2149 } else {
2150 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2151 }
2152 fpscr.qc = 1;
2153 }
2154 FpscrQc = fpscr;
2155 '''
2156 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2157 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2158 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2159 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2160
2161 vmaxfpCode = '''
2162 FPSCR fpscr = (FPSCR) FpscrExc;
2163 bool done;
2164 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2165 if (!done) {
2166 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2167 true, true, VfpRoundNearest);
2168 } else if (flushToZero(srcReg1, srcReg2)) {
2169 fpscr.idc = 1;
2170 }
2171 FpscrExc = fpscr;
2172 '''
2173 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2174 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2175
2176 vminfpCode = '''
2177 FPSCR fpscr = (FPSCR) FpscrExc;
2178 bool done;
2179 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2180 if (!done) {
2181 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2182 true, true, VfpRoundNearest);
2183 } else if (flushToZero(srcReg1, srcReg2)) {
2184 fpscr.idc = 1;
2185 }
2186 FpscrExc = fpscr;
2187 '''
2188 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2189 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2190
2191 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2192 2, vmaxfpCode, pairwise=True)
2193 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2194 4, vmaxfpCode, pairwise=True)
2195
2196 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2197 2, vminfpCode, pairwise=True)
2198 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2199 4, vminfpCode, pairwise=True)
2200
2201 vaddfpCode = '''
2202 FPSCR fpscr = (FPSCR) FpscrExc;
2203 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2204 true, true, VfpRoundNearest);
2205 FpscrExc = fpscr;
2206 '''
2207 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2208 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2209
2210 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2211 2, vaddfpCode, pairwise=True)
2212 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2213 4, vaddfpCode, pairwise=True)
2214
2215 vsubfpCode = '''
2216 FPSCR fpscr = (FPSCR) FpscrExc;
2217 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2218 true, true, VfpRoundNearest);
2219 FpscrExc = fpscr;
2220 '''
2221 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2222 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2223
2224 vmulfpCode = '''
2225 FPSCR fpscr = (FPSCR) FpscrExc;
2226 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2227 true, true, VfpRoundNearest);
2228 FpscrExc = fpscr;
2229 '''
2230 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2231 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2232
2233 vmlafpCode = '''
2234 FPSCR fpscr = (FPSCR) FpscrExc;
2235 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2236 true, true, VfpRoundNearest);
2237 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2238 true, true, VfpRoundNearest);
2239 FpscrExc = fpscr;
2240 '''
2241 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2242 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2243
2244 vmlsfpCode = '''
2245 FPSCR fpscr = (FPSCR) FpscrExc;
2246 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2247 true, true, VfpRoundNearest);
2248 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2249 true, true, VfpRoundNearest);
2250 FpscrExc = fpscr;
2251 '''
2252 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2253 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2254
2255 vcgtfpCode = '''
2256 FPSCR fpscr = (FPSCR) FpscrExc;
2257 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2258 true, true, VfpRoundNearest);
2259 destReg = (res == 0) ? -1 : 0;
2260 if (res == 2.0)
2261 fpscr.ioc = 1;
2262 FpscrExc = fpscr;
2263 '''
2264 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2265 2, vcgtfpCode, toInt = True)
2266 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2267 4, vcgtfpCode, toInt = True)
2268
2269 vcgefpCode = '''
2270 FPSCR fpscr = (FPSCR) FpscrExc;
2271 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2272 true, true, VfpRoundNearest);
2273 destReg = (res == 0) ? -1 : 0;
2274 if (res == 2.0)
2275 fpscr.ioc = 1;
2276 FpscrExc = fpscr;
2277 '''
2278 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2279 2, vcgefpCode, toInt = True)
2280 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2281 4, vcgefpCode, toInt = True)
2282
2283 vacgtfpCode = '''
2284 FPSCR fpscr = (FPSCR) FpscrExc;
2285 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2286 true, true, VfpRoundNearest);
2287 destReg = (res == 0) ? -1 : 0;
2288 if (res == 2.0)
2289 fpscr.ioc = 1;
2290 FpscrExc = fpscr;
2291 '''
2292 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2293 2, vacgtfpCode, toInt = True)
2294 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2295 4, vacgtfpCode, toInt = True)
2296
2297 vacgefpCode = '''
2298 FPSCR fpscr = (FPSCR) FpscrExc;
2299 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2300 true, true, VfpRoundNearest);
2301 destReg = (res == 0) ? -1 : 0;
2302 if (res == 2.0)
2303 fpscr.ioc = 1;
2304 FpscrExc = fpscr;
2305 '''
2306 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2307 2, vacgefpCode, toInt = True)
2308 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2309 4, vacgefpCode, toInt = True)
2310
2311 vceqfpCode = '''
2312 FPSCR fpscr = (FPSCR) FpscrExc;
2313 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2314 true, true, VfpRoundNearest);
2315 destReg = (res == 0) ? -1 : 0;
2316 if (res == 2.0)
2317 fpscr.ioc = 1;
2318 FpscrExc = fpscr;
2319 '''
2320 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2321 2, vceqfpCode, toInt = True)
2322 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2323 4, vceqfpCode, toInt = True)
2324
2325 vrecpsCode = '''
2326 FPSCR fpscr = (FPSCR) FpscrExc;
2327 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2328 true, true, VfpRoundNearest);
2329 FpscrExc = fpscr;
2330 '''
2331 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2332 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2333
2334 vrsqrtsCode = '''
2335 FPSCR fpscr = (FPSCR) FpscrExc;
2336 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2337 true, true, VfpRoundNearest);
2338 FpscrExc = fpscr;
2339 '''
2340 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2341 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2342
2343 vabdfpCode = '''
2344 FPSCR fpscr = (FPSCR) FpscrExc;
2345 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2346 true, true, VfpRoundNearest);
2347 destReg = fabs(mid);
2348 FpscrExc = fpscr;
2349 '''
2350 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2351 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2352
2353 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2354 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2355 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2356 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2357 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2358
2359 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2360 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2361 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2362 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2363 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2364
2365 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2366 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2367 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2368 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2369 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2370
2371 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2372 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2373 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2374 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2375 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2376 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2377 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2378 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2379 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2380
2381 vshrCode = '''
2382 if (imm >= sizeof(srcElem1) * 8) {
2383 if (ltz(srcElem1))
2384 destElem = -1;
2385 else
2386 destElem = 0;
2387 } else {
2388 destElem = srcElem1 >> imm;
2389 }
2390 '''
2391 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2392 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2393
2394 vsraCode = '''
2395 Element mid;;
2396 if (imm >= sizeof(srcElem1) * 8) {
2397 mid = ltz(srcElem1) ? -1 : 0;
2398 } else {
2399 mid = srcElem1 >> imm;
2400 if (ltz(srcElem1) && !ltz(mid)) {
2401 mid |= -(mid & ((Element)1 <<
2402 (sizeof(Element) * 8 - 1 - imm)));
2403 }
2404 }
2405 destElem += mid;
2406 '''
2407 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2408 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2409
2410 vrshrCode = '''
2411 if (imm > sizeof(srcElem1) * 8) {
2412 destElem = 0;
2413 } else if (imm) {
2414 Element rBit = bits(srcElem1, imm - 1);
2415 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2416 } else {
2417 destElem = srcElem1;
2418 }
2419 '''
2420 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2421 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2422
2423 vrsraCode = '''
2424 if (imm > sizeof(srcElem1) * 8) {
2425 destElem += 0;
2426 } else if (imm) {
2427 Element rBit = bits(srcElem1, imm - 1);
2428 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2429 } else {
2430 destElem += srcElem1;
2431 }
2432 '''
2433 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2434 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2435
2436 vsriCode = '''
2437 if (imm >= sizeof(Element) * 8)
2438 destElem = destElem;
2439 else
2440 destElem = (srcElem1 >> imm) |
2441 (destElem & ~mask(sizeof(Element) * 8 - imm));
2442 '''
2443 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2444 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2445
2446 vshlCode = '''
2447 if (imm >= sizeof(Element) * 8)
2448 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2449 else
2450 destElem = srcElem1 << imm;
2451 '''
2452 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2453 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2454
2455 vsliCode = '''
2456 if (imm >= sizeof(Element) * 8)
2457 destElem = destElem;
2458 else
2459 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2460 '''
2461 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2462 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2463
2464 vqshlCode = '''
2465 FPSCR fpscr = (FPSCR) FpscrQc;
2466 if (imm >= sizeof(Element) * 8) {
2467 if (srcElem1 != 0) {
2468 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2469 if (srcElem1 > 0)
2470 destElem = ~destElem;
2471 fpscr.qc = 1;
2472 } else {
2473 destElem = 0;
2474 }
2475 } else if (imm) {
2476 destElem = (srcElem1 << imm);
2477 uint64_t topBits = bits((uint64_t)srcElem1,
2478 sizeof(Element) * 8 - 1,
2479 sizeof(Element) * 8 - 1 - imm);
2480 if (topBits != 0 && topBits != mask(imm + 1)) {
2481 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2482 if (srcElem1 > 0)
2483 destElem = ~destElem;
2484 fpscr.qc = 1;
2485 }
2486 } else {
2487 destElem = srcElem1;
2488 }
2489 FpscrQc = fpscr;
2490 '''
2491 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2492 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2493
2494 vqshluCode = '''
2495 FPSCR fpscr = (FPSCR) FpscrQc;
2496 if (imm >= sizeof(Element) * 8) {
2497 if (srcElem1 != 0) {
2498 destElem = mask(sizeof(Element) * 8);
2499 fpscr.qc = 1;
2500 } else {
2501 destElem = 0;
2502 }
2503 } else if (imm) {
2504 destElem = (srcElem1 << imm);
2505 uint64_t topBits = bits((uint64_t)srcElem1,
2506 sizeof(Element) * 8 - 1,
2507 sizeof(Element) * 8 - imm);
2508 if (topBits != 0) {
2509 destElem = mask(sizeof(Element) * 8);
2510 fpscr.qc = 1;
2511 }
2512 } else {
2513 destElem = srcElem1;
2514 }
2515 FpscrQc = fpscr;
2516 '''
2517 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2518 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2519
2520 vqshlusCode = '''
2521 FPSCR fpscr = (FPSCR) FpscrQc;
2522 if (imm >= sizeof(Element) * 8) {
2523 if (srcElem1 < 0) {
2524 destElem = 0;
2525 fpscr.qc = 1;
2526 } else if (srcElem1 > 0) {
2527 destElem = mask(sizeof(Element) * 8);
2528 fpscr.qc = 1;
2529 } else {
2530 destElem = 0;
2531 }
2532 } else if (imm) {
2533 destElem = (srcElem1 << imm);
2534 uint64_t topBits = bits((uint64_t)srcElem1,
2535 sizeof(Element) * 8 - 1,
2536 sizeof(Element) * 8 - imm);
2537 if (srcElem1 < 0) {
2538 destElem = 0;
2539 fpscr.qc = 1;
2540 } else if (topBits != 0) {
2541 destElem = mask(sizeof(Element) * 8);
2542 fpscr.qc = 1;
2543 }
2544 } else {
2545 if (srcElem1 < 0) {
2546 fpscr.qc = 1;
2547 destElem = 0;
2548 } else {
2549 destElem = srcElem1;
2550 }
2551 }
2552 FpscrQc = fpscr;
2553 '''
2554 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2555 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2556
2557 vshrnCode = '''
2558 if (imm >= sizeof(srcElem1) * 8) {
2559 destElem = 0;
2560 } else {
2561 destElem = srcElem1 >> imm;
2562 }
2563 '''
2564 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2565
2566 vrshrnCode = '''
2567 if (imm > sizeof(srcElem1) * 8) {
2568 destElem = 0;
2569 } else if (imm) {
2570 Element rBit = bits(srcElem1, imm - 1);
2571 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2572 } else {
2573 destElem = srcElem1;
2574 }
2575 '''
2576 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2577
2578 vqshrnCode = '''
2579 FPSCR fpscr = (FPSCR) FpscrQc;
2580 if (imm > sizeof(srcElem1) * 8) {
2581 if (srcElem1 != 0 && srcElem1 != -1)
2582 fpscr.qc = 1;
2583 destElem = 0;
2584 } else if (imm) {
2585 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2586 mid |= -(mid & ((BigElement)1 <<
2587 (sizeof(BigElement) * 8 - 1 - imm)));
2588 if (mid != (Element)mid) {
2589 destElem = mask(sizeof(Element) * 8 - 1);
2590 if (srcElem1 < 0)
2591 destElem = ~destElem;
2592 fpscr.qc = 1;
2593 } else {
2594 destElem = mid;
2595 }
2596 } else {
2597 destElem = srcElem1;
2598 }
2599 FpscrQc = fpscr;
2600 '''
2601 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2602
2603 vqshrunCode = '''
2604 FPSCR fpscr = (FPSCR) FpscrQc;
2605 if (imm > sizeof(srcElem1) * 8) {
2606 if (srcElem1 != 0)
2607 fpscr.qc = 1;
2608 destElem = 0;
2609 } else if (imm) {
2610 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2611 if (mid != (Element)mid) {
2612 destElem = mask(sizeof(Element) * 8);
2613 fpscr.qc = 1;
2614 } else {
2615 destElem = mid;
2616 }
2617 } else {
2618 destElem = srcElem1;
2619 }
2620 FpscrQc = fpscr;
2621 '''
2622 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2623 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2624
2625 vqshrunsCode = '''
2626 FPSCR fpscr = (FPSCR) FpscrQc;
2627 if (imm > sizeof(srcElem1) * 8) {
2628 if (srcElem1 != 0)
2629 fpscr.qc = 1;
2630 destElem = 0;
2631 } else if (imm) {
2632 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2633 if (bits(mid, sizeof(BigElement) * 8 - 1,
2634 sizeof(Element) * 8) != 0) {
2635 if (srcElem1 < 0) {
2636 destElem = 0;
2637 } else {
2638 destElem = mask(sizeof(Element) * 8);
2639 }
2640 fpscr.qc = 1;
2641 } else {
2642 destElem = mid;
2643 }
2644 } else {
2645 destElem = srcElem1;
2646 }
2647 FpscrQc = fpscr;
2648 '''
2649 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2650 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2651
2652 vqrshrnCode = '''
2653 FPSCR fpscr = (FPSCR) FpscrQc;
2654 if (imm > sizeof(srcElem1) * 8) {
2655 if (srcElem1 != 0 && srcElem1 != -1)
2656 fpscr.qc = 1;
2657 destElem = 0;
2658 } else if (imm) {
2659 BigElement mid = (srcElem1 >> (imm - 1));
2660 uint64_t rBit = mid & 0x1;
2661 mid >>= 1;
2662 mid |= -(mid & ((BigElement)1 <<
2663 (sizeof(BigElement) * 8 - 1 - imm)));
2664 mid += rBit;
2665 if (mid != (Element)mid) {
2666 destElem = mask(sizeof(Element) * 8 - 1);
2667 if (srcElem1 < 0)
2668 destElem = ~destElem;
2669 fpscr.qc = 1;
2670 } else {
2671 destElem = mid;
2672 }
2673 } else {
2674 if (srcElem1 != (Element)srcElem1) {
2675 destElem = mask(sizeof(Element) * 8 - 1);
2676 if (srcElem1 < 0)
2677 destElem = ~destElem;
2678 fpscr.qc = 1;
2679 } else {
2680 destElem = srcElem1;
2681 }
2682 }
2683 FpscrQc = fpscr;
2684 '''
2685 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2686 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2687
2688 vqrshrunCode = '''
2689 FPSCR fpscr = (FPSCR) FpscrQc;
2690 if (imm > sizeof(srcElem1) * 8) {
2691 if (srcElem1 != 0)
2692 fpscr.qc = 1;
2693 destElem = 0;
2694 } else if (imm) {
2695 BigElement mid = (srcElem1 >> (imm - 1));
2696 uint64_t rBit = mid & 0x1;
2697 mid >>= 1;
2698 mid += rBit;
2699 if (mid != (Element)mid) {
2700 destElem = mask(sizeof(Element) * 8);
2701 fpscr.qc = 1;
2702 } else {
2703 destElem = mid;
2704 }
2705 } else {
2706 if (srcElem1 != (Element)srcElem1) {
2707 destElem = mask(sizeof(Element) * 8 - 1);
2708 fpscr.qc = 1;
2709 } else {
2710 destElem = srcElem1;
2711 }
2712 }
2713 FpscrQc = fpscr;
2714 '''
2715 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2716 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2717
2718 vqrshrunsCode = '''
2719 FPSCR fpscr = (FPSCR) FpscrQc;
2720 if (imm > sizeof(srcElem1) * 8) {
2721 if (srcElem1 != 0)
2722 fpscr.qc = 1;
2723 destElem = 0;
2724 } else if (imm) {
2725 BigElement mid = (srcElem1 >> (imm - 1));
2726 uint64_t rBit = mid & 0x1;
2727 mid >>= 1;
2728 mid |= -(mid & ((BigElement)1 <<
2729 (sizeof(BigElement) * 8 - 1 - imm)));
2730 mid += rBit;
2731 if (bits(mid, sizeof(BigElement) * 8 - 1,
2732 sizeof(Element) * 8) != 0) {
2733 if (srcElem1 < 0) {
2734 destElem = 0;
2735 } else {
2736 destElem = mask(sizeof(Element) * 8);
2737 }
2738 fpscr.qc = 1;
2739 } else {
2740 destElem = mid;
2741 }
2742 } else {
2743 if (srcElem1 < 0) {
2744 fpscr.qc = 1;
2745 destElem = 0;
2746 } else {
2747 destElem = srcElem1;
2748 }
2749 }
2750 FpscrQc = fpscr;
2751 '''
2752 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2753 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2754
2755 vshllCode = '''
2756 if (imm >= sizeof(destElem) * 8) {
2757 destElem = 0;
2758 } else {
2759 destElem = (BigElement)srcElem1 << imm;
2760 }
2761 '''
2762 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2763
2764 vmovlCode = '''
2765 destElem = srcElem1;
2766 '''
2767 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2768
2769 vcvt2ufxCode = '''
2770 FPSCR fpscr = (FPSCR) FpscrExc;
2771 if (flushToZero(srcElem1))
2772 fpscr.idc = 1;
2773 VfpSavedState state = prepFpState(VfpRoundNearest);
2774 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2775 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2776 __asm__ __volatile__("" :: "m" (destReg));
2777 finishVfp(fpscr, state, true);
2778 FpscrExc = fpscr;
2779 '''
2780 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2781 2, vcvt2ufxCode, toInt = True)
2782 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2783 4, vcvt2ufxCode, toInt = True)
2784
2785 vcvt2sfxCode = '''
2786 FPSCR fpscr = (FPSCR) FpscrExc;
2787 if (flushToZero(srcElem1))
2788 fpscr.idc = 1;
2789 VfpSavedState state = prepFpState(VfpRoundNearest);
2790 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2791 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2792 __asm__ __volatile__("" :: "m" (destReg));
2793 finishVfp(fpscr, state, true);
2794 FpscrExc = fpscr;
2795 '''
2796 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2797 2, vcvt2sfxCode, toInt = True)
2798 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2799 4, vcvt2sfxCode, toInt = True)
2800
2801 vcvtu2fpCode = '''
2802 FPSCR fpscr = (FPSCR) FpscrExc;
2803 VfpSavedState state = prepFpState(VfpRoundNearest);
2804 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2805 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2806 __asm__ __volatile__("" :: "m" (destElem));
2807 finishVfp(fpscr, state, true);
2808 FpscrExc = fpscr;
2809 '''
2810 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2811 2, vcvtu2fpCode, fromInt = True)
2812 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2813 4, vcvtu2fpCode, fromInt = True)
2814
2815 vcvts2fpCode = '''
2816 FPSCR fpscr = (FPSCR) FpscrExc;
2817 VfpSavedState state = prepFpState(VfpRoundNearest);
2818 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2819 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2820 __asm__ __volatile__("" :: "m" (destElem));
2821 finishVfp(fpscr, state, true);
2822 FpscrExc = fpscr;
2823 '''
2824 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2825 2, vcvts2fpCode, fromInt = True)
2826 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2827 4, vcvts2fpCode, fromInt = True)
2828
2829 vcvts2hCode = '''
2830 FPSCR fpscr = (FPSCR) FpscrExc;
2831 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2832 if (flushToZero(srcFp1))
2833 fpscr.idc = 1;
2834 VfpSavedState state = prepFpState(VfpRoundNearest);
2835 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2836 : "m" (srcFp1), "m" (destElem));
2837 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2838 fpscr.ahp, srcFp1);
2839 __asm__ __volatile__("" :: "m" (destElem));
2840 finishVfp(fpscr, state, true);
2841 FpscrExc = fpscr;
2842 '''
2843 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2844
2845 vcvth2sCode = '''
2846 FPSCR fpscr = (FPSCR) FpscrExc;
2847 VfpSavedState state = prepFpState(VfpRoundNearest);
2848 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2849 : "m" (srcElem1), "m" (destElem));
2850 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2851 __asm__ __volatile__("" :: "m" (destElem));
2852 finishVfp(fpscr, state, true);
2853 FpscrExc = fpscr;
2854 '''
2855 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2856
2857 vrsqrteCode = '''
2858 destElem = unsignedRSqrtEstimate(srcElem1);
2859 '''
2860 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2861 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2862
2863 vrsqrtefpCode = '''
2864 FPSCR fpscr = (FPSCR) FpscrExc;
2865 if (flushToZero(srcReg1))
2866 fpscr.idc = 1;
2867 destReg = fprSqrtEstimate(fpscr, srcReg1);
2868 FpscrExc = fpscr;
2869 '''
2870 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2871 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2872
2873 vrecpeCode = '''
2874 destElem = unsignedRecipEstimate(srcElem1);
2875 '''
2876 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2877 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2878
2879 vrecpefpCode = '''
2880 FPSCR fpscr = (FPSCR) FpscrExc;
2881 if (flushToZero(srcReg1))
2882 fpscr.idc = 1;
2883 destReg = fpRecipEstimate(fpscr, srcReg1);
2884 FpscrExc = fpscr;
2885 '''
2886 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2887 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2888
2889 vrev16Code = '''
2890 destElem = srcElem1;
2891 unsigned groupSize = ((1 << 1) / sizeof(Element));
2892 unsigned reverseMask = (groupSize - 1);
2893 j = i ^ reverseMask;
2894 '''
2895 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2896 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2897 vrev32Code = '''
2898 destElem = srcElem1;
2899 unsigned groupSize = ((1 << 2) / sizeof(Element));
2900 unsigned reverseMask = (groupSize - 1);
2901 j = i ^ reverseMask;
2902 '''
2903 twoRegMiscInst("vrev32", "NVrev32D",
2904 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2905 twoRegMiscInst("vrev32", "NVrev32Q",
2906 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2907 vrev64Code = '''
2908 destElem = srcElem1;
2909 unsigned groupSize = ((1 << 3) / sizeof(Element));
2910 unsigned reverseMask = (groupSize - 1);
2911 j = i ^ reverseMask;
2912 '''
2913 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2914 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2915
2916 vpaddlCode = '''
2917 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2918 '''
2919 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2920 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2921
2922 vpadalCode = '''
2923 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2924 '''
2925 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2926 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2927
2928 vclsCode = '''
2929 unsigned count = 0;
2930 if (srcElem1 < 0) {
2931 srcElem1 <<= 1;
2932 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2933 count++;
2934 srcElem1 <<= 1;
2935 }
2936 } else {
2937 srcElem1 <<= 1;
2938 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2939 count++;
2940 srcElem1 <<= 1;
2941 }
2942 }
2943 destElem = count;
2944 '''
2945 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2946 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2947
2948 vclzCode = '''
2949 unsigned count = 0;
2950 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2951 count++;
2952 srcElem1 <<= 1;
2953 }
2954 destElem = count;
2955 '''
2956 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2957 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2958
2959 vcntCode = '''
2960 unsigned count = 0;
2961 while (srcElem1 && count < sizeof(Element) * 8) {
2962 count += srcElem1 & 0x1;
2963 srcElem1 >>= 1;
2964 }
2965 destElem = count;
2966 '''
2967
2968 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2969 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2970
2971 vmvnCode = '''
2972 destElem = ~srcElem1;
2973 '''
2974 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2975 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2976
2977 vqabsCode = '''
2978 FPSCR fpscr = (FPSCR) FpscrQc;
2979 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2980 fpscr.qc = 1;
2981 destElem = ~srcElem1;
2982 } else if (srcElem1 < 0) {
2983 destElem = -srcElem1;
2984 } else {
2985 destElem = srcElem1;
2986 }
2987 FpscrQc = fpscr;
2988 '''
2989 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2990 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2991
2992 vqnegCode = '''
2993 FPSCR fpscr = (FPSCR) FpscrQc;
2994 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2995 fpscr.qc = 1;
2996 destElem = ~srcElem1;
2997 } else {
2998 destElem = -srcElem1;
2999 }
3000 FpscrQc = fpscr;
3001 '''
3002 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3003 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3004
3005 vabsCode = '''
3006 if (srcElem1 < 0) {
3007 destElem = -srcElem1;
3008 } else {
3009 destElem = srcElem1;
3010 }
3011 '''
3012
3013 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3014 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3015 vabsfpCode = '''
3016 union
3017 {
3018 uint32_t i;
3019 float f;
3020 } cStruct;
3021 cStruct.f = srcReg1;
3022 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3023 destReg = cStruct.f;
3024 '''
3025 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3026 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3027
3028 vnegCode = '''
3029 destElem = -srcElem1;
3030 '''
3031 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3032 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3033 vnegfpCode = '''
3034 destReg = -srcReg1;
3035 '''
3036 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3037 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3038
3039 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3040 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3041 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3042 vcgtfpCode = '''
3043 FPSCR fpscr = (FPSCR) FpscrExc;
3044 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3045 true, true, VfpRoundNearest);
3046 destReg = (res == 0) ? -1 : 0;
3047 if (res == 2.0)
3048 fpscr.ioc = 1;
3049 FpscrExc = fpscr;
3050 '''
3051 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3052 2, vcgtfpCode, toInt = True)
3053 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3054 4, vcgtfpCode, toInt = True)
3055
3056 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3057 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3058 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3059 vcgefpCode = '''
3060 FPSCR fpscr = (FPSCR) FpscrExc;
3061 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3062 true, true, VfpRoundNearest);
3063 destReg = (res == 0) ? -1 : 0;
3064 if (res == 2.0)
3065 fpscr.ioc = 1;
3066 FpscrExc = fpscr;
3067 '''
3068 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3069 2, vcgefpCode, toInt = True)
3070 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3071 4, vcgefpCode, toInt = True)
3072
3073 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3074 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3075 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3076 vceqfpCode = '''
3077 FPSCR fpscr = (FPSCR) FpscrExc;
3078 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3079 true, true, VfpRoundNearest);
3080 destReg = (res == 0) ? -1 : 0;
3081 if (res == 2.0)
3082 fpscr.ioc = 1;
3083 FpscrExc = fpscr;
3084 '''
3085 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3086 2, vceqfpCode, toInt = True)
3087 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3088 4, vceqfpCode, toInt = True)
3089
3090 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3091 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3092 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3093 vclefpCode = '''
3094 FPSCR fpscr = (FPSCR) FpscrExc;
3095 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3096 true, true, VfpRoundNearest);
3097 destReg = (res == 0) ? -1 : 0;
3098 if (res == 2.0)
3099 fpscr.ioc = 1;
3100 FpscrExc = fpscr;
3101 '''
3102 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3103 2, vclefpCode, toInt = True)
3104 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3105 4, vclefpCode, toInt = True)
3106
3107 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3108 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3109 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3110 vcltfpCode = '''
3111 FPSCR fpscr = (FPSCR) FpscrExc;
3112 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3113 true, true, VfpRoundNearest);
3114 destReg = (res == 0) ? -1 : 0;
3115 if (res == 2.0)
3116 fpscr.ioc = 1;
3117 FpscrExc = fpscr;
3118 '''
3119 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3120 2, vcltfpCode, toInt = True)
3121 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3122 4, vcltfpCode, toInt = True)
3123
3124 vswpCode = '''
3125 FloatRegBits mid;
3126 for (unsigned r = 0; r < rCount; r++) {
3127 mid = srcReg1.regs[r];
3128 srcReg1.regs[r] = destReg.regs[r];
3129 destReg.regs[r] = mid;
3130 }
3131 '''
3132 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3133 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3134
3135 vtrnCode = '''
3136 Element mid;
3137 for (unsigned i = 0; i < eCount; i += 2) {
3138 mid = srcReg1.elements[i];
3139 srcReg1.elements[i] = destReg.elements[i + 1];
3140 destReg.elements[i + 1] = mid;
3141 }
3142 '''
3143 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3144 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3145
3146 vuzpCode = '''
3147 Element mid[eCount];
3148 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3149 for (unsigned i = 0; i < eCount / 2; i++) {
3150 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3151 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3152 destReg.elements[i] = destReg.elements[2 * i];
3153 }
3154 for (unsigned i = 0; i < eCount / 2; i++) {
3155 destReg.elements[eCount / 2 + i] = mid[2 * i];
3156 }
3157 '''
3158 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3159 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3160
3161 vzipCode = '''
3162 Element mid[eCount];
3163 memcpy(&mid, &destReg, sizeof(destReg));
3164 for (unsigned i = 0; i < eCount / 2; i++) {
3165 destReg.elements[2 * i] = mid[i];
3166 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3167 }
3168 for (int i = 0; i < eCount / 2; i++) {
3169 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3170 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3171 }
3172 '''
3173 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3174 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3175
3176 vmovnCode = 'destElem = srcElem1;'
3177 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3178
3179 vdupCode = 'destElem = srcElem1;'
3180 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3181 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3182
3183 def vdupGprInst(name, Name, opClass, types, rCount):
3184 global header_output, exec_output
3185 eWalkCode = '''
3186 RegVect destReg;
3187 for (unsigned i = 0; i < eCount; i++) {
3188 destReg.elements[i] = htog((Element)Op1);
3189 }
3190 '''
3191 for reg in range(rCount):
3192 eWalkCode += '''
3193 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3194 ''' % { "reg" : reg }
3195 iop = InstObjParams(name, Name,
3196 "RegRegOp",
3197 { "code": eWalkCode,
3198 "r_count": rCount,
3199 "predicate_test": predicateTest,
3200 "op_class": opClass }, [])
3201 header_output += NeonRegRegOpDeclare.subst(iop)
3202 exec_output += NeonEqualRegExecute.subst(iop)
3203 for type in types:
3204 substDict = { "targs" : type,
3205 "class_name" : Name }
3206 exec_output += NeonExecDeclare.subst(substDict)
3207 vdupGprInst("vdup", "NVdupDGpr", "SimdAluOp", smallUnsignedTypes, 2)
3208 vdupGprInst("vdup", "NVdupQGpr", "SimdAluOp", smallUnsignedTypes, 4)
3209
3210 vmovCode = 'destElem = imm;'
3211 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3212 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3213
3214 vorrCode = 'destElem |= imm;'
3215 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3216 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3217
3218 vmvnCode = 'destElem = ~imm;'
3219 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3220 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3221
3222 vbicCode = 'destElem &= ~imm;'
3223 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3224 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3225
3226 vqmovnCode = '''
3227 FPSCR fpscr = (FPSCR) FpscrQc;
3228 destElem = srcElem1;
3229 if ((BigElement)destElem != srcElem1) {
3230 fpscr.qc = 1;
3231 destElem = mask(sizeof(Element) * 8 - 1);
3232 if (srcElem1 < 0)
3233 destElem = ~destElem;
3234 }
3235 FpscrQc = fpscr;
3236 '''
3237 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3238
3239 vqmovunCode = '''
3240 FPSCR fpscr = (FPSCR) FpscrQc;
3241 destElem = srcElem1;
3242 if ((BigElement)destElem != srcElem1) {
3243 fpscr.qc = 1;
3244 destElem = mask(sizeof(Element) * 8);
3245 }
3246 FpscrQc = fpscr;
3247 '''
3248 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3249 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3250
3251 vqmovunsCode = '''
3252 FPSCR fpscr = (FPSCR) FpscrQc;
3253 destElem = srcElem1;
3254 if (srcElem1 < 0 ||
3255 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3256 fpscr.qc = 1;
3257 destElem = mask(sizeof(Element) * 8);
3258 if (srcElem1 < 0)
3259 destElem = ~destElem;
3260 }
3261 FpscrQc = fpscr;
3262 '''
3263 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3264 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3265
3266 def buildVext(name, Name, opClass, types, rCount, op):
3267 global header_output, exec_output
3268 eWalkCode = '''
3269 RegVect srcReg1, srcReg2, destReg;
3270 '''
3271 for reg in range(rCount):
3272 eWalkCode += simdEnabledCheckCode + '''
3273 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3274 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3275 ''' % { "reg" : reg }
3276 eWalkCode += op
3277 for reg in range(rCount):
3278 eWalkCode += '''
3279 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3280 ''' % { "reg" : reg }
3281 iop = InstObjParams(name, Name,
3282 "RegRegRegImmOp",
3283 { "code": eWalkCode,
3284 "r_count": rCount,
3285 "predicate_test": predicateTest,
3286 "op_class": opClass }, [])
3287 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3288 exec_output += NeonEqualRegExecute.subst(iop)
3289 for type in types:
3290 substDict = { "targs" : type,
3291 "class_name" : Name }
3292 exec_output += NeonExecDeclare.subst(substDict)
3293
3294 vextCode = '''
3295 for (unsigned i = 0; i < eCount; i++) {
3296 unsigned index = i + imm;
3297 if (index < eCount) {
3298 destReg.elements[i] = srcReg1.elements[index];
3299 } else {
3300 index -= eCount;
3280 assert(index < eCount);
3281 destReg.elements[i] = srcReg2.elements[index];
3301 if (index >= eCount)
3302#if FULL_SYSTEM
3303 fault = new UndefinedInstruction;
3304#else
3305 fault = new UndefinedInstruction(false, mnemonic);
3306#endif
3307 else
3308 destReg.elements[i] = srcReg2.elements[index];
3282 }
3283 }
3284 '''
3285 buildVext("vext", "NVextD", "SimdAluOp", ("uint8_t",), 2, vextCode)
3286 buildVext("vext", "NVextQ", "SimdAluOp", ("uint8_t",), 4, vextCode)
3287
3288 def buildVtbxl(name, Name, opClass, length, isVtbl):
3289 global header_output, decoder_output, exec_output
3290 code = '''
3291 union
3292 {
3293 uint8_t bytes[32];
3294 FloatRegBits regs[8];
3295 } table;
3296
3297 union
3298 {
3299 uint8_t bytes[8];
3300 FloatRegBits regs[2];
3301 } destReg, srcReg2;
3302
3303 const unsigned length = %(length)d;
3304 const bool isVtbl = %(isVtbl)s;
3305
3306 srcReg2.regs[0] = htog(FpOp2P0.uw);
3307 srcReg2.regs[1] = htog(FpOp2P1.uw);
3308
3309 destReg.regs[0] = htog(FpDestP0.uw);
3310 destReg.regs[1] = htog(FpDestP1.uw);
3311 ''' % { "length" : length, "isVtbl" : isVtbl }
3312 for reg in range(8):
3313 if reg < length * 2:
3314 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3315 { "reg" : reg }
3316 else:
3317 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3318 code += '''
3319 for (unsigned i = 0; i < sizeof(destReg); i++) {
3320 uint8_t index = srcReg2.bytes[i];
3321 if (index < 8 * length) {
3322 destReg.bytes[i] = table.bytes[index];
3323 } else {
3324 if (isVtbl)
3325 destReg.bytes[i] = 0;
3326 // else destReg.bytes[i] unchanged
3327 }
3328 }
3329
3330 FpDestP0.uw = gtoh(destReg.regs[0]);
3331 FpDestP1.uw = gtoh(destReg.regs[1]);
3332 '''
3333 iop = InstObjParams(name, Name,
3334 "RegRegRegOp",
3335 { "code": code,
3336 "predicate_test": predicateTest,
3337 "op_class": opClass }, [])
3338 header_output += RegRegRegOpDeclare.subst(iop)
3339 decoder_output += RegRegRegOpConstructor.subst(iop)
3340 exec_output += PredOpExecute.subst(iop)
3341
3342 buildVtbxl("vtbl", "NVtbl1", "SimdAluOp", 1, "true")
3343 buildVtbxl("vtbl", "NVtbl2", "SimdAluOp", 2, "true")
3344 buildVtbxl("vtbl", "NVtbl3", "SimdAluOp", 3, "true")
3345 buildVtbxl("vtbl", "NVtbl4", "SimdAluOp", 4, "true")
3346
3347 buildVtbxl("vtbx", "NVtbx1", "SimdAluOp", 1, "false")
3348 buildVtbxl("vtbx", "NVtbx2", "SimdAluOp", 2, "false")
3349 buildVtbxl("vtbx", "NVtbx3", "SimdAluOp", 3, "false")
3350 buildVtbxl("vtbx", "NVtbx4", "SimdAluOp", 4, "false")
3351}};
3309 }
3310 }
3311 '''
3312 buildVext("vext", "NVextD", "SimdAluOp", ("uint8_t",), 2, vextCode)
3313 buildVext("vext", "NVextQ", "SimdAluOp", ("uint8_t",), 4, vextCode)
3314
3315 def buildVtbxl(name, Name, opClass, length, isVtbl):
3316 global header_output, decoder_output, exec_output
3317 code = '''
3318 union
3319 {
3320 uint8_t bytes[32];
3321 FloatRegBits regs[8];
3322 } table;
3323
3324 union
3325 {
3326 uint8_t bytes[8];
3327 FloatRegBits regs[2];
3328 } destReg, srcReg2;
3329
3330 const unsigned length = %(length)d;
3331 const bool isVtbl = %(isVtbl)s;
3332
3333 srcReg2.regs[0] = htog(FpOp2P0.uw);
3334 srcReg2.regs[1] = htog(FpOp2P1.uw);
3335
3336 destReg.regs[0] = htog(FpDestP0.uw);
3337 destReg.regs[1] = htog(FpDestP1.uw);
3338 ''' % { "length" : length, "isVtbl" : isVtbl }
3339 for reg in range(8):
3340 if reg < length * 2:
3341 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3342 { "reg" : reg }
3343 else:
3344 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3345 code += '''
3346 for (unsigned i = 0; i < sizeof(destReg); i++) {
3347 uint8_t index = srcReg2.bytes[i];
3348 if (index < 8 * length) {
3349 destReg.bytes[i] = table.bytes[index];
3350 } else {
3351 if (isVtbl)
3352 destReg.bytes[i] = 0;
3353 // else destReg.bytes[i] unchanged
3354 }
3355 }
3356
3357 FpDestP0.uw = gtoh(destReg.regs[0]);
3358 FpDestP1.uw = gtoh(destReg.regs[1]);
3359 '''
3360 iop = InstObjParams(name, Name,
3361 "RegRegRegOp",
3362 { "code": code,
3363 "predicate_test": predicateTest,
3364 "op_class": opClass }, [])
3365 header_output += RegRegRegOpDeclare.subst(iop)
3366 decoder_output += RegRegRegOpConstructor.subst(iop)
3367 exec_output += PredOpExecute.subst(iop)
3368
3369 buildVtbxl("vtbl", "NVtbl1", "SimdAluOp", 1, "true")
3370 buildVtbxl("vtbl", "NVtbl2", "SimdAluOp", 2, "true")
3371 buildVtbxl("vtbl", "NVtbl3", "SimdAluOp", 3, "true")
3372 buildVtbxl("vtbl", "NVtbl4", "SimdAluOp", 4, "true")
3373
3374 buildVtbxl("vtbx", "NVtbx1", "SimdAluOp", 1, "false")
3375 buildVtbxl("vtbx", "NVtbx2", "SimdAluOp", 2, "false")
3376 buildVtbxl("vtbx", "NVtbx3", "SimdAluOp", 3, "false")
3377 buildVtbxl("vtbx", "NVtbx4", "SimdAluOp", 4, "false")
3378}};