neon.isa (12038:619bc4100aa8) neon.isa (13544:0b4e5446167c)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133 IntRegIndex dest, IntRegIndex op1,
134 IntRegIndex op2)
135 {
136 switch (size) {
137 case 1:
138 return new Base<int16_t>(machInst, dest, op1, op2);
139 case 2:
140 return new Base<int32_t>(machInst, dest, op1, op2);
141 default:
142 return new Unknown(machInst);
143 }
144 }
145
146 template <template <typename T> class Base>
147 StaticInstPtr
148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2, uint64_t imm)
151 {
152 switch (size) {
153 case 1:
154 return new Base<int16_t>(machInst, dest, op1, op2, imm);
155 case 2:
156 return new Base<int32_t>(machInst, dest, op1, op2, imm);
157 default:
158 return new Unknown(machInst);
159 }
160 }
161
162 template <template <typename T> class Base>
163 StaticInstPtr
164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (notSigned) {
169 return decodeNeonUThreeUSReg<Base>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<Base>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUThreeSReg(bool q, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (q) {
185 return decodeNeonUThreeUSReg<BaseQ>(
186 size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonUThreeUSReg<BaseD>(
189 size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonSThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonSThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonSThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeXReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (q) {
233 return decodeNeonUThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonUThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
247 {
248 if (notSigned) {
249 return decodeNeonUThreeSReg<BaseD, BaseQ>(
250 q, size, machInst, dest, op1, op2);
251 } else {
252 return decodeNeonSThreeSReg<BaseD, BaseQ>(
253 q, size, machInst, dest, op1, op2);
254 }
255 }
256
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
259 StaticInstPtr
260 decodeNeonUThreeReg(bool q, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
263 {
264 if (q) {
265 return decodeNeonUThreeUReg<BaseQ>(
266 size, machInst, dest, op1, op2);
267 } else {
268 return decodeNeonUThreeUReg<BaseD>(
269 size, machInst, dest, op1, op2);
270 }
271 }
272
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
275 StaticInstPtr
276 decodeNeonSThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
279 {
280 if (q) {
281 return decodeNeonSThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
283 } else {
284 return decodeNeonSThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
286 }
287 }
288
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
291 StaticInstPtr
292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
295 {
296 if (notSigned) {
297 return decodeNeonUThreeReg<BaseD, BaseQ>(
298 q, size, machInst, dest, op1, op2);
299 } else {
300 return decodeNeonSThreeReg<BaseD, BaseQ>(
301 q, size, machInst, dest, op1, op2);
302 }
303 }
304
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
307 StaticInstPtr
308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310 {
311 if (q) {
312 if (size)
313 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314 else
315 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316 } else {
317 if (size)
318 return new Unknown(machInst);
319 else
320 return new BaseD<uint32_t>(machInst, dest, op1, op2);
321 }
322 }
323
324 template <template <typename T> class Base>
325 StaticInstPtr
326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328 {
329 if (size)
330 return new Base<uint64_t>(machInst, dest, op1, op2);
331 else
332 return new Base<uint32_t>(machInst, dest, op1, op2);
333 }
334
335 template <template <typename T> class Base>
336 StaticInstPtr
337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338 IntRegIndex dest, IntRegIndex op1,
339 IntRegIndex op2, uint64_t imm)
340 {
341 if (size)
342 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343 else
344 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345 }
346
347 template <template <typename T> class BaseD,
348 template <typename T> class BaseQ>
349 StaticInstPtr
350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351 IntRegIndex dest, IntRegIndex op1,
352 IntRegIndex op2, uint64_t imm)
353 {
354 if (q) {
355 switch (size) {
356 case 1:
357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358 case 2:
359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360 default:
361 return new Unknown(machInst);
362 }
363 } else {
364 switch (size) {
365 case 1:
366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367 case 2:
368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369 default:
370 return new Unknown(machInst);
371 }
372 }
373 }
374
375 template <template <typename T> class BaseD,
376 template <typename T> class BaseQ>
377 StaticInstPtr
378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379 IntRegIndex dest, IntRegIndex op1,
380 IntRegIndex op2, uint64_t imm)
381 {
382 if (q) {
383 switch (size) {
384 case 1:
385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386 case 2:
387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388 default:
389 return new Unknown(machInst);
390 }
391 } else {
392 switch (size) {
393 case 1:
394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395 case 2:
396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397 default:
398 return new Unknown(machInst);
399 }
400 }
401 }
402
403 template <template <typename T> class BaseD,
404 template <typename T> class BaseQ>
405 StaticInstPtr
406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407 IntRegIndex dest, IntRegIndex op1,
408 IntRegIndex op2, uint64_t imm)
409 {
410 if (q) {
411 if (size)
412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413 else
414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415 } else {
416 if (size)
417 return new Unknown(machInst);
418 else
419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420 }
421 }
422
423 template <template <typename T> class BaseD,
424 template <typename T> class BaseQ>
425 StaticInstPtr
426 decodeNeonUTwoShiftReg(bool q, unsigned size,
427 ExtMachInst machInst, IntRegIndex dest,
428 IntRegIndex op1, uint64_t imm)
429 {
430 if (q) {
431 switch (size) {
432 case 0:
433 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434 case 1:
435 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436 case 2:
437 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438 case 3:
439 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440 default:
441 return new Unknown(machInst);
442 }
443 } else {
444 switch (size) {
445 case 0:
446 return new BaseD<uint8_t>(machInst, dest, op1, imm);
447 case 1:
448 return new BaseD<uint16_t>(machInst, dest, op1, imm);
449 case 2:
450 return new BaseD<uint32_t>(machInst, dest, op1, imm);
451 case 3:
452 return new BaseD<uint64_t>(machInst, dest, op1, imm);
453 default:
454 return new Unknown(machInst);
455 }
456 }
457 }
458
459 template <template <typename T> class BaseD,
460 template <typename T> class BaseQ>
461 StaticInstPtr
462 decodeNeonSTwoShiftReg(bool q, unsigned size,
463 ExtMachInst machInst, IntRegIndex dest,
464 IntRegIndex op1, uint64_t imm)
465 {
466 if (q) {
467 switch (size) {
468 case 0:
469 return new BaseQ<int8_t>(machInst, dest, op1, imm);
470 case 1:
471 return new BaseQ<int16_t>(machInst, dest, op1, imm);
472 case 2:
473 return new BaseQ<int32_t>(machInst, dest, op1, imm);
474 case 3:
475 return new BaseQ<int64_t>(machInst, dest, op1, imm);
476 default:
477 return new Unknown(machInst);
478 }
479 } else {
480 switch (size) {
481 case 0:
482 return new BaseD<int8_t>(machInst, dest, op1, imm);
483 case 1:
484 return new BaseD<int16_t>(machInst, dest, op1, imm);
485 case 2:
486 return new BaseD<int32_t>(machInst, dest, op1, imm);
487 case 3:
488 return new BaseD<int64_t>(machInst, dest, op1, imm);
489 default:
490 return new Unknown(machInst);
491 }
492 }
493 }
494
495
496 template <template <typename T> class BaseD,
497 template <typename T> class BaseQ>
498 StaticInstPtr
499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500 ExtMachInst machInst, IntRegIndex dest,
501 IntRegIndex op1, uint64_t imm)
502 {
503 if (notSigned) {
504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505 q, size, machInst, dest, op1, imm);
506 } else {
507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508 q, size, machInst, dest, op1, imm);
509 }
510 }
511
512 template <template <typename T> class Base>
513 StaticInstPtr
514 decodeNeonUTwoShiftUSReg(unsigned size,
515 ExtMachInst machInst, IntRegIndex dest,
516 IntRegIndex op1, uint64_t imm)
517 {
518 switch (size) {
519 case 0:
520 return new Base<uint8_t>(machInst, dest, op1, imm);
521 case 1:
522 return new Base<uint16_t>(machInst, dest, op1, imm);
523 case 2:
524 return new Base<uint32_t>(machInst, dest, op1, imm);
525 default:
526 return new Unknown(machInst);
527 }
528 }
529
530 template <template <typename T> class Base>
531 StaticInstPtr
532 decodeNeonUTwoShiftUReg(unsigned size,
533 ExtMachInst machInst, IntRegIndex dest,
534 IntRegIndex op1, uint64_t imm)
535 {
536 switch (size) {
537 case 0:
538 return new Base<uint8_t>(machInst, dest, op1, imm);
539 case 1:
540 return new Base<uint16_t>(machInst, dest, op1, imm);
541 case 2:
542 return new Base<uint32_t>(machInst, dest, op1, imm);
543 case 3:
544 return new Base<uint64_t>(machInst, dest, op1, imm);
545 default:
546 return new Unknown(machInst);
547 }
548 }
549
550 template <template <typename T> class Base>
551 StaticInstPtr
552 decodeNeonSTwoShiftUReg(unsigned size,
553 ExtMachInst machInst, IntRegIndex dest,
554 IntRegIndex op1, uint64_t imm)
555 {
556 switch (size) {
557 case 0:
558 return new Base<int8_t>(machInst, dest, op1, imm);
559 case 1:
560 return new Base<int16_t>(machInst, dest, op1, imm);
561 case 2:
562 return new Base<int32_t>(machInst, dest, op1, imm);
563 case 3:
564 return new Base<int64_t>(machInst, dest, op1, imm);
565 default:
566 return new Unknown(machInst);
567 }
568 }
569
570 template <template <typename T> class BaseD,
571 template <typename T> class BaseQ>
572 StaticInstPtr
573 decodeNeonUTwoShiftSReg(bool q, unsigned size,
574 ExtMachInst machInst, IntRegIndex dest,
575 IntRegIndex op1, uint64_t imm)
576 {
577 if (q) {
578 return decodeNeonUTwoShiftUSReg<BaseQ>(
579 size, machInst, dest, op1, imm);
580 } else {
581 return decodeNeonUTwoShiftUSReg<BaseD>(
582 size, machInst, dest, op1, imm);
583 }
584 }
585
586 template <template <typename T> class Base>
587 StaticInstPtr
588 decodeNeonSTwoShiftUSReg(unsigned size,
589 ExtMachInst machInst, IntRegIndex dest,
590 IntRegIndex op1, uint64_t imm)
591 {
592 switch (size) {
593 case 0:
594 return new Base<int8_t>(machInst, dest, op1, imm);
595 case 1:
596 return new Base<int16_t>(machInst, dest, op1, imm);
597 case 2:
598 return new Base<int32_t>(machInst, dest, op1, imm);
599 default:
600 return new Unknown(machInst);
601 }
602 }
603
604 template <template <typename T> class BaseD,
605 template <typename T> class BaseQ>
606 StaticInstPtr
607 decodeNeonSTwoShiftSReg(bool q, unsigned size,
608 ExtMachInst machInst, IntRegIndex dest,
609 IntRegIndex op1, uint64_t imm)
610 {
611 if (q) {
612 return decodeNeonSTwoShiftUSReg<BaseQ>(
613 size, machInst, dest, op1, imm);
614 } else {
615 return decodeNeonSTwoShiftUSReg<BaseD>(
616 size, machInst, dest, op1, imm);
617 }
618 }
619
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
622 StaticInstPtr
623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
626 {
627 if (notSigned) {
628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629 q, size, machInst, dest, op1, imm);
630 } else {
631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632 q, size, machInst, dest, op1, imm);
633 }
634 }
635
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
638 StaticInstPtr
639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641 {
642 if (q) {
643 return decodeNeonUTwoShiftUReg<BaseQ>(
644 size, machInst, dest, op1, imm);
645 } else {
646 return decodeNeonUTwoShiftUSReg<BaseD>(
647 size, machInst, dest, op1, imm);
648 }
649 }
650
651 template <template <typename T> class BaseD,
652 template <typename T> class BaseQ>
653 StaticInstPtr
654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656 {
657 if (q) {
658 return decodeNeonSTwoShiftUReg<BaseQ>(
659 size, machInst, dest, op1, imm);
660 } else {
661 return decodeNeonSTwoShiftUSReg<BaseD>(
662 size, machInst, dest, op1, imm);
663 }
664 }
665
666 template <template <typename T> class Base>
667 StaticInstPtr
668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670 {
671 if (size)
672 return new Base<uint64_t>(machInst, dest, op1, imm);
673 else
674 return new Base<uint32_t>(machInst, dest, op1, imm);
675 }
676
677 template <template <typename T> class BaseD,
678 template <typename T> class BaseQ>
679 StaticInstPtr
680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682 {
683 if (q) {
684 if (size)
685 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686 else
687 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688 } else {
689 if (size)
690 return new Unknown(machInst);
691 else
692 return new BaseD<uint32_t>(machInst, dest, op1, imm);
693 }
694 }
695
696 template <template <typename T> class Base>
697 StaticInstPtr
698 decodeNeonUTwoMiscUSReg(unsigned size,
699 ExtMachInst machInst, IntRegIndex dest,
700 IntRegIndex op1)
701 {
702 switch (size) {
703 case 0:
704 return new Base<uint8_t>(machInst, dest, op1);
705 case 1:
706 return new Base<uint16_t>(machInst, dest, op1);
707 case 2:
708 return new Base<uint32_t>(machInst, dest, op1);
709 default:
710 return new Unknown(machInst);
711 }
712 }
713
714 template <template <typename T> class Base>
715 StaticInstPtr
716 decodeNeonSTwoMiscUSReg(unsigned size,
717 ExtMachInst machInst, IntRegIndex dest,
718 IntRegIndex op1)
719 {
720 switch (size) {
721 case 0:
722 return new Base<int8_t>(machInst, dest, op1);
723 case 1:
724 return new Base<int16_t>(machInst, dest, op1);
725 case 2:
726 return new Base<int32_t>(machInst, dest, op1);
727 default:
728 return new Unknown(machInst);
729 }
730 }
731
732 template <template <typename T> class BaseD,
733 template <typename T> class BaseQ>
734 StaticInstPtr
735 decodeNeonUTwoMiscSReg(bool q, unsigned size,
736 ExtMachInst machInst, IntRegIndex dest,
737 IntRegIndex op1)
738 {
739 if (q) {
740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741 } else {
742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743 }
744 }
745
746 template <template <typename T> class BaseD,
747 template <typename T> class BaseQ>
748 StaticInstPtr
749 decodeNeonSTwoMiscSReg(bool q, unsigned size,
750 ExtMachInst machInst, IntRegIndex dest,
751 IntRegIndex op1)
752 {
753 if (q) {
754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755 } else {
756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757 }
758 }
759
760 template <template <typename T> class Base>
761 StaticInstPtr
762 decodeNeonUTwoMiscUReg(unsigned size,
763 ExtMachInst machInst, IntRegIndex dest,
764 IntRegIndex op1)
765 {
766 switch (size) {
767 case 0:
768 return new Base<uint8_t>(machInst, dest, op1);
769 case 1:
770 return new Base<uint16_t>(machInst, dest, op1);
771 case 2:
772 return new Base<uint32_t>(machInst, dest, op1);
773 case 3:
774 return new Base<uint64_t>(machInst, dest, op1);
775 default:
776 return new Unknown(machInst);
777 }
778 }
779
780 template <template <typename T> class Base>
781 StaticInstPtr
782 decodeNeonSTwoMiscUReg(unsigned size,
783 ExtMachInst machInst, IntRegIndex dest,
784 IntRegIndex op1)
785 {
786 switch (size) {
787 case 0:
788 return new Base<int8_t>(machInst, dest, op1);
789 case 1:
790 return new Base<int16_t>(machInst, dest, op1);
791 case 2:
792 return new Base<int32_t>(machInst, dest, op1);
793 case 3:
794 return new Base<int64_t>(machInst, dest, op1);
795 default:
796 return new Unknown(machInst);
797 }
798 }
799
800 template <template <typename T> class BaseD,
801 template <typename T> class BaseQ>
802 StaticInstPtr
803 decodeNeonSTwoMiscReg(bool q, unsigned size,
804 ExtMachInst machInst, IntRegIndex dest,
805 IntRegIndex op1)
806 {
807 if (q) {
808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809 } else {
810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811 }
812 }
813
814 template <template <typename T> class BaseD,
815 template <typename T> class BaseQ>
816 StaticInstPtr
817 decodeNeonUTwoMiscReg(bool q, unsigned size,
818 ExtMachInst machInst, IntRegIndex dest,
819 IntRegIndex op1)
820 {
821 if (q) {
822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823 } else {
824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825 }
826 }
827
828 template <template <typename T> class BaseD,
829 template <typename T> class BaseQ>
830 StaticInstPtr
831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832 ExtMachInst machInst, IntRegIndex dest,
833 IntRegIndex op1)
834 {
835 if (notSigned) {
836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837 q, size, machInst, dest, op1);
838 } else {
839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840 q, size, machInst, dest, op1);
841 }
842 }
843
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
846 StaticInstPtr
847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848 IntRegIndex dest, IntRegIndex op1)
849 {
850 if (q) {
851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852 } else {
853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854 }
855 }
856
857 template <template <typename T> class BaseD,
858 template <typename T> class BaseQ>
859 StaticInstPtr
860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861 IntRegIndex dest, IntRegIndex op1)
862 {
863 if (q) {
864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865 } else {
866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867 }
868 }
869
870 template <template <typename T> class BaseD,
871 template <typename T> class BaseQ>
872 StaticInstPtr
873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874 IntRegIndex dest, IntRegIndex op1)
875 {
876 if (q) {
877 if (size)
878 return new BaseQ<uint64_t>(machInst, dest, op1);
879 else
880 return new BaseQ<uint32_t>(machInst, dest, op1);
881 } else {
882 if (size)
883 return new Unknown(machInst);
884 else
885 return new BaseD<uint32_t>(machInst, dest, op1);
886 }
887 }
888
889 template <template <typename T> class BaseD,
890 template <typename T> class BaseQ>
891 StaticInstPtr
892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893 IntRegIndex dest, IntRegIndex op1)
894 {
895 if (size)
896 return new BaseQ<uint64_t>(machInst, dest, op1);
897 else
898 return new BaseD<uint32_t>(machInst, dest, op1);
899 }
900
901 template <template <typename T> class Base>
902 StaticInstPtr
903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904 IntRegIndex dest, IntRegIndex op1)
905 {
906 if (size)
907 return new Base<uint64_t>(machInst, dest, op1);
908 else
909 return new Base<uint32_t>(machInst, dest, op1);
910 }
911
912 template <template <typename T> class BaseD,
913 template <typename T> class BaseQ>
914 StaticInstPtr
915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1)
917 {
918 if (q) {
919 switch (size) {
920 case 0x0:
921 return new BaseQ<uint8_t>(machInst, dest, op1);
922 case 0x1:
923 return new BaseQ<uint16_t>(machInst, dest, op1);
924 case 0x2:
925 return new BaseQ<uint32_t>(machInst, dest, op1);
926 default:
927 return new Unknown(machInst);
928 }
929 } else {
930 switch (size) {
931 case 0x0:
932 return new BaseD<uint8_t>(machInst, dest, op1);
933 case 0x1:
934 return new BaseD<uint16_t>(machInst, dest, op1);
935 default:
936 return new Unknown(machInst);
937 }
938 }
939 }
940
941 template <template <typename T> class BaseD,
942 template <typename T> class BaseQ,
943 template <typename T> class BaseBQ>
944 StaticInstPtr
945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946 IntRegIndex dest, IntRegIndex op1)
947 {
948 if (q) {
949 switch (size) {
950 case 0x0:
951 return new BaseQ<uint8_t>(machInst, dest, op1);
952 case 0x1:
953 return new BaseQ<uint16_t>(machInst, dest, op1);
954 case 0x2:
955 return new BaseBQ<uint32_t>(machInst, dest, op1);
956 default:
957 return new Unknown(machInst);
958 }
959 } else {
960 switch (size) {
961 case 0x0:
962 return new BaseD<uint8_t>(machInst, dest, op1);
963 case 0x1:
964 return new BaseD<uint16_t>(machInst, dest, op1);
965 default:
966 return new Unknown(machInst);
967 }
968 }
969 }
970
971 template <template <typename T> class BaseD,
972 template <typename T> class BaseQ>
973 StaticInstPtr
974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975 IntRegIndex dest, IntRegIndex op1)
976 {
977 if (q) {
978 switch (size) {
979 case 0x0:
980 return new BaseQ<int8_t>(machInst, dest, op1);
981 case 0x1:
982 return new BaseQ<int16_t>(machInst, dest, op1);
983 case 0x2:
984 return new BaseQ<int32_t>(machInst, dest, op1);
985 default:
986 return new Unknown(machInst);
987 }
988 } else {
989 switch (size) {
990 case 0x0:
991 return new BaseD<int8_t>(machInst, dest, op1);
992 case 0x1:
993 return new BaseD<int16_t>(machInst, dest, op1);
994 default:
995 return new Unknown(machInst);
996 }
997 }
998 }
999
1000 template <template <typename T> class BaseD,
1001 template <typename T> class BaseQ,
1002 template <typename T> class BaseBQ>
1003 StaticInstPtr
1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005 IntRegIndex dest, IntRegIndex op1)
1006 {
1007 if (q) {
1008 switch (size) {
1009 case 0x0:
1010 return new BaseQ<uint8_t>(machInst, dest, op1);
1011 case 0x1:
1012 return new BaseQ<uint16_t>(machInst, dest, op1);
1013 case 0x2:
1014 return new BaseBQ<uint32_t>(machInst, dest, op1);
1015 default:
1016 return new Unknown(machInst);
1017 }
1018 } else {
1019 switch (size) {
1020 case 0x0:
1021 return new BaseD<uint8_t>(machInst, dest, op1);
1022 case 0x1:
1023 return new BaseD<uint16_t>(machInst, dest, op1);
1024 default:
1025 return new Unknown(machInst);
1026 }
1027 }
1028 }
1029
1030 template <template <typename T> class BaseD,
1031 template <typename T> class BaseQ,
1032 template <typename T> class BaseBQ>
1033 StaticInstPtr
1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035 IntRegIndex dest, IntRegIndex op1)
1036 {
1037 if (q) {
1038 switch (size) {
1039 case 0x0:
1040 return new BaseQ<int8_t>(machInst, dest, op1);
1041 case 0x1:
1042 return new BaseQ<int16_t>(machInst, dest, op1);
1043 case 0x2:
1044 return new BaseBQ<int32_t>(machInst, dest, op1);
1045 default:
1046 return new Unknown(machInst);
1047 }
1048 } else {
1049 switch (size) {
1050 case 0x0:
1051 return new BaseD<int8_t>(machInst, dest, op1);
1052 case 0x1:
1053 return new BaseD<int16_t>(machInst, dest, op1);
1054 default:
1055 return new Unknown(machInst);
1056 }
1057 }
1058 }
1059}};
1060
1061let {{
1062 header_output = ""
1063 exec_output = ""
1064
1065 vcompares = '''
1066 static float
1067 vcgtFunc(float op1, float op2)
1068 {
1069 if (std::isnan(op1) || std::isnan(op2))
1070 return 2.0;
1071 return (op1 > op2) ? 0.0 : 1.0;
1072 }
1073
1074 static float
1075 vcgeFunc(float op1, float op2)
1076 {
1077 if (std::isnan(op1) || std::isnan(op2))
1078 return 2.0;
1079 return (op1 >= op2) ? 0.0 : 1.0;
1080 }
1081
1082 static float
1083 vceqFunc(float op1, float op2)
1084 {
1085 if (isSnan(op1) || isSnan(op2))
1086 return 2.0;
1087 return (op1 == op2) ? 0.0 : 1.0;
1088 }
1089'''
1090 vcomparesL = '''
1091 static float
1092 vcleFunc(float op1, float op2)
1093 {
1094 if (std::isnan(op1) || std::isnan(op2))
1095 return 2.0;
1096 return (op1 <= op2) ? 0.0 : 1.0;
1097 }
1098
1099 static float
1100 vcltFunc(float op1, float op2)
1101 {
1102 if (std::isnan(op1) || std::isnan(op2))
1103 return 2.0;
1104 return (op1 < op2) ? 0.0 : 1.0;
1105 }
1106'''
1107 vacomparesG = '''
1108 static float
1109 vacgtFunc(float op1, float op2)
1110 {
1111 if (std::isnan(op1) || std::isnan(op2))
1112 return 2.0;
1113 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114 }
1115
1116 static float
1117 vacgeFunc(float op1, float op2)
1118 {
1119 if (std::isnan(op1) || std::isnan(op2))
1120 return 2.0;
1121 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122 }
1123'''
1124
1125 exec_output += vcompares + vacomparesG
1126
1127 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130 signedTypes = smallSignedTypes + ("int64_t",)
1131 smallTypes = smallUnsignedTypes + smallSignedTypes
1132 allTypes = unsignedTypes + signedTypes
1133
1134 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135 readDest=False, pairwise=False):
1136 global header_output, exec_output
1137 eWalkCode = simdEnabledCheckCode + '''
1138 RegVect srcReg1, srcReg2, destReg;
1139 '''
1140 for reg in range(rCount):
1141 eWalkCode += '''
1142 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144 ''' % { "reg" : reg }
1145 if readDest:
1146 eWalkCode += '''
1147 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148 ''' % { "reg" : reg }
1149 readDestCode = ''
1150 if readDest:
1151 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152 if pairwise:
1153 eWalkCode += '''
1154 for (unsigned i = 0; i < eCount; i++) {
1155 Element srcElem1 = gtoh(2 * i < eCount ?
1156 srcReg1.elements[2 * i] :
1157 srcReg2.elements[2 * i - eCount]);
1158 Element srcElem2 = gtoh(2 * i < eCount ?
1159 srcReg1.elements[2 * i + 1] :
1160 srcReg2.elements[2 * i + 1 - eCount]);
1161 Element destElem;
1162 %(readDest)s
1163 %(op)s
1164 destReg.elements[i] = htog(destElem);
1165 }
1166 ''' % { "op" : op, "readDest" : readDestCode }
1167 else:
1168 eWalkCode += '''
1169 for (unsigned i = 0; i < eCount; i++) {
1170 Element srcElem1 = gtoh(srcReg1.elements[i]);
1171 Element srcElem2 = gtoh(srcReg2.elements[i]);
1172 Element destElem;
1173 %(readDest)s
1174 %(op)s
1175 destReg.elements[i] = htog(destElem);
1176 }
1177 ''' % { "op" : op, "readDest" : readDestCode }
1178 for reg in range(rCount):
1179 eWalkCode += '''
1180 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181 ''' % { "reg" : reg }
1182 iop = InstObjParams(name, Name,
1183 "RegRegRegOp",
1184 { "code": eWalkCode,
1185 "r_count": rCount,
1186 "predicate_test": predicateTest,
1187 "op_class": opClass }, [])
1188 header_output += NeonRegRegRegOpDeclare.subst(iop)
1189 exec_output += NeonEqualRegExecute.subst(iop)
1190 for type in types:
1191 substDict = { "targs" : type,
1192 "class_name" : Name }
1193 exec_output += NeonExecDeclare.subst(substDict)
1194
1195 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196 readDest=False, pairwise=False, toInt=False):
1197 global header_output, exec_output
1198 eWalkCode = simdEnabledCheckCode + '''
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133 IntRegIndex dest, IntRegIndex op1,
134 IntRegIndex op2)
135 {
136 switch (size) {
137 case 1:
138 return new Base<int16_t>(machInst, dest, op1, op2);
139 case 2:
140 return new Base<int32_t>(machInst, dest, op1, op2);
141 default:
142 return new Unknown(machInst);
143 }
144 }
145
146 template <template <typename T> class Base>
147 StaticInstPtr
148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2, uint64_t imm)
151 {
152 switch (size) {
153 case 1:
154 return new Base<int16_t>(machInst, dest, op1, op2, imm);
155 case 2:
156 return new Base<int32_t>(machInst, dest, op1, op2, imm);
157 default:
158 return new Unknown(machInst);
159 }
160 }
161
162 template <template <typename T> class Base>
163 StaticInstPtr
164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (notSigned) {
169 return decodeNeonUThreeUSReg<Base>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<Base>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUThreeSReg(bool q, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (q) {
185 return decodeNeonUThreeUSReg<BaseQ>(
186 size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonUThreeUSReg<BaseD>(
189 size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonSThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonSThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonSThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeXReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (q) {
233 return decodeNeonUThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonUThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
247 {
248 if (notSigned) {
249 return decodeNeonUThreeSReg<BaseD, BaseQ>(
250 q, size, machInst, dest, op1, op2);
251 } else {
252 return decodeNeonSThreeSReg<BaseD, BaseQ>(
253 q, size, machInst, dest, op1, op2);
254 }
255 }
256
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
259 StaticInstPtr
260 decodeNeonUThreeReg(bool q, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
263 {
264 if (q) {
265 return decodeNeonUThreeUReg<BaseQ>(
266 size, machInst, dest, op1, op2);
267 } else {
268 return decodeNeonUThreeUReg<BaseD>(
269 size, machInst, dest, op1, op2);
270 }
271 }
272
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
275 StaticInstPtr
276 decodeNeonSThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
279 {
280 if (q) {
281 return decodeNeonSThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
283 } else {
284 return decodeNeonSThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
286 }
287 }
288
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
291 StaticInstPtr
292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
295 {
296 if (notSigned) {
297 return decodeNeonUThreeReg<BaseD, BaseQ>(
298 q, size, machInst, dest, op1, op2);
299 } else {
300 return decodeNeonSThreeReg<BaseD, BaseQ>(
301 q, size, machInst, dest, op1, op2);
302 }
303 }
304
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
307 StaticInstPtr
308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310 {
311 if (q) {
312 if (size)
313 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314 else
315 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316 } else {
317 if (size)
318 return new Unknown(machInst);
319 else
320 return new BaseD<uint32_t>(machInst, dest, op1, op2);
321 }
322 }
323
324 template <template <typename T> class Base>
325 StaticInstPtr
326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328 {
329 if (size)
330 return new Base<uint64_t>(machInst, dest, op1, op2);
331 else
332 return new Base<uint32_t>(machInst, dest, op1, op2);
333 }
334
335 template <template <typename T> class Base>
336 StaticInstPtr
337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338 IntRegIndex dest, IntRegIndex op1,
339 IntRegIndex op2, uint64_t imm)
340 {
341 if (size)
342 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343 else
344 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345 }
346
347 template <template <typename T> class BaseD,
348 template <typename T> class BaseQ>
349 StaticInstPtr
350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351 IntRegIndex dest, IntRegIndex op1,
352 IntRegIndex op2, uint64_t imm)
353 {
354 if (q) {
355 switch (size) {
356 case 1:
357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358 case 2:
359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360 default:
361 return new Unknown(machInst);
362 }
363 } else {
364 switch (size) {
365 case 1:
366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367 case 2:
368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369 default:
370 return new Unknown(machInst);
371 }
372 }
373 }
374
375 template <template <typename T> class BaseD,
376 template <typename T> class BaseQ>
377 StaticInstPtr
378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379 IntRegIndex dest, IntRegIndex op1,
380 IntRegIndex op2, uint64_t imm)
381 {
382 if (q) {
383 switch (size) {
384 case 1:
385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386 case 2:
387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388 default:
389 return new Unknown(machInst);
390 }
391 } else {
392 switch (size) {
393 case 1:
394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395 case 2:
396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397 default:
398 return new Unknown(machInst);
399 }
400 }
401 }
402
403 template <template <typename T> class BaseD,
404 template <typename T> class BaseQ>
405 StaticInstPtr
406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407 IntRegIndex dest, IntRegIndex op1,
408 IntRegIndex op2, uint64_t imm)
409 {
410 if (q) {
411 if (size)
412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413 else
414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415 } else {
416 if (size)
417 return new Unknown(machInst);
418 else
419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420 }
421 }
422
423 template <template <typename T> class BaseD,
424 template <typename T> class BaseQ>
425 StaticInstPtr
426 decodeNeonUTwoShiftReg(bool q, unsigned size,
427 ExtMachInst machInst, IntRegIndex dest,
428 IntRegIndex op1, uint64_t imm)
429 {
430 if (q) {
431 switch (size) {
432 case 0:
433 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434 case 1:
435 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436 case 2:
437 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438 case 3:
439 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440 default:
441 return new Unknown(machInst);
442 }
443 } else {
444 switch (size) {
445 case 0:
446 return new BaseD<uint8_t>(machInst, dest, op1, imm);
447 case 1:
448 return new BaseD<uint16_t>(machInst, dest, op1, imm);
449 case 2:
450 return new BaseD<uint32_t>(machInst, dest, op1, imm);
451 case 3:
452 return new BaseD<uint64_t>(machInst, dest, op1, imm);
453 default:
454 return new Unknown(machInst);
455 }
456 }
457 }
458
459 template <template <typename T> class BaseD,
460 template <typename T> class BaseQ>
461 StaticInstPtr
462 decodeNeonSTwoShiftReg(bool q, unsigned size,
463 ExtMachInst machInst, IntRegIndex dest,
464 IntRegIndex op1, uint64_t imm)
465 {
466 if (q) {
467 switch (size) {
468 case 0:
469 return new BaseQ<int8_t>(machInst, dest, op1, imm);
470 case 1:
471 return new BaseQ<int16_t>(machInst, dest, op1, imm);
472 case 2:
473 return new BaseQ<int32_t>(machInst, dest, op1, imm);
474 case 3:
475 return new BaseQ<int64_t>(machInst, dest, op1, imm);
476 default:
477 return new Unknown(machInst);
478 }
479 } else {
480 switch (size) {
481 case 0:
482 return new BaseD<int8_t>(machInst, dest, op1, imm);
483 case 1:
484 return new BaseD<int16_t>(machInst, dest, op1, imm);
485 case 2:
486 return new BaseD<int32_t>(machInst, dest, op1, imm);
487 case 3:
488 return new BaseD<int64_t>(machInst, dest, op1, imm);
489 default:
490 return new Unknown(machInst);
491 }
492 }
493 }
494
495
496 template <template <typename T> class BaseD,
497 template <typename T> class BaseQ>
498 StaticInstPtr
499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500 ExtMachInst machInst, IntRegIndex dest,
501 IntRegIndex op1, uint64_t imm)
502 {
503 if (notSigned) {
504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505 q, size, machInst, dest, op1, imm);
506 } else {
507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508 q, size, machInst, dest, op1, imm);
509 }
510 }
511
512 template <template <typename T> class Base>
513 StaticInstPtr
514 decodeNeonUTwoShiftUSReg(unsigned size,
515 ExtMachInst machInst, IntRegIndex dest,
516 IntRegIndex op1, uint64_t imm)
517 {
518 switch (size) {
519 case 0:
520 return new Base<uint8_t>(machInst, dest, op1, imm);
521 case 1:
522 return new Base<uint16_t>(machInst, dest, op1, imm);
523 case 2:
524 return new Base<uint32_t>(machInst, dest, op1, imm);
525 default:
526 return new Unknown(machInst);
527 }
528 }
529
530 template <template <typename T> class Base>
531 StaticInstPtr
532 decodeNeonUTwoShiftUReg(unsigned size,
533 ExtMachInst machInst, IntRegIndex dest,
534 IntRegIndex op1, uint64_t imm)
535 {
536 switch (size) {
537 case 0:
538 return new Base<uint8_t>(machInst, dest, op1, imm);
539 case 1:
540 return new Base<uint16_t>(machInst, dest, op1, imm);
541 case 2:
542 return new Base<uint32_t>(machInst, dest, op1, imm);
543 case 3:
544 return new Base<uint64_t>(machInst, dest, op1, imm);
545 default:
546 return new Unknown(machInst);
547 }
548 }
549
550 template <template <typename T> class Base>
551 StaticInstPtr
552 decodeNeonSTwoShiftUReg(unsigned size,
553 ExtMachInst machInst, IntRegIndex dest,
554 IntRegIndex op1, uint64_t imm)
555 {
556 switch (size) {
557 case 0:
558 return new Base<int8_t>(machInst, dest, op1, imm);
559 case 1:
560 return new Base<int16_t>(machInst, dest, op1, imm);
561 case 2:
562 return new Base<int32_t>(machInst, dest, op1, imm);
563 case 3:
564 return new Base<int64_t>(machInst, dest, op1, imm);
565 default:
566 return new Unknown(machInst);
567 }
568 }
569
570 template <template <typename T> class BaseD,
571 template <typename T> class BaseQ>
572 StaticInstPtr
573 decodeNeonUTwoShiftSReg(bool q, unsigned size,
574 ExtMachInst machInst, IntRegIndex dest,
575 IntRegIndex op1, uint64_t imm)
576 {
577 if (q) {
578 return decodeNeonUTwoShiftUSReg<BaseQ>(
579 size, machInst, dest, op1, imm);
580 } else {
581 return decodeNeonUTwoShiftUSReg<BaseD>(
582 size, machInst, dest, op1, imm);
583 }
584 }
585
586 template <template <typename T> class Base>
587 StaticInstPtr
588 decodeNeonSTwoShiftUSReg(unsigned size,
589 ExtMachInst machInst, IntRegIndex dest,
590 IntRegIndex op1, uint64_t imm)
591 {
592 switch (size) {
593 case 0:
594 return new Base<int8_t>(machInst, dest, op1, imm);
595 case 1:
596 return new Base<int16_t>(machInst, dest, op1, imm);
597 case 2:
598 return new Base<int32_t>(machInst, dest, op1, imm);
599 default:
600 return new Unknown(machInst);
601 }
602 }
603
604 template <template <typename T> class BaseD,
605 template <typename T> class BaseQ>
606 StaticInstPtr
607 decodeNeonSTwoShiftSReg(bool q, unsigned size,
608 ExtMachInst machInst, IntRegIndex dest,
609 IntRegIndex op1, uint64_t imm)
610 {
611 if (q) {
612 return decodeNeonSTwoShiftUSReg<BaseQ>(
613 size, machInst, dest, op1, imm);
614 } else {
615 return decodeNeonSTwoShiftUSReg<BaseD>(
616 size, machInst, dest, op1, imm);
617 }
618 }
619
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
622 StaticInstPtr
623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
626 {
627 if (notSigned) {
628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629 q, size, machInst, dest, op1, imm);
630 } else {
631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632 q, size, machInst, dest, op1, imm);
633 }
634 }
635
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
638 StaticInstPtr
639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641 {
642 if (q) {
643 return decodeNeonUTwoShiftUReg<BaseQ>(
644 size, machInst, dest, op1, imm);
645 } else {
646 return decodeNeonUTwoShiftUSReg<BaseD>(
647 size, machInst, dest, op1, imm);
648 }
649 }
650
651 template <template <typename T> class BaseD,
652 template <typename T> class BaseQ>
653 StaticInstPtr
654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656 {
657 if (q) {
658 return decodeNeonSTwoShiftUReg<BaseQ>(
659 size, machInst, dest, op1, imm);
660 } else {
661 return decodeNeonSTwoShiftUSReg<BaseD>(
662 size, machInst, dest, op1, imm);
663 }
664 }
665
666 template <template <typename T> class Base>
667 StaticInstPtr
668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670 {
671 if (size)
672 return new Base<uint64_t>(machInst, dest, op1, imm);
673 else
674 return new Base<uint32_t>(machInst, dest, op1, imm);
675 }
676
677 template <template <typename T> class BaseD,
678 template <typename T> class BaseQ>
679 StaticInstPtr
680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682 {
683 if (q) {
684 if (size)
685 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686 else
687 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688 } else {
689 if (size)
690 return new Unknown(machInst);
691 else
692 return new BaseD<uint32_t>(machInst, dest, op1, imm);
693 }
694 }
695
696 template <template <typename T> class Base>
697 StaticInstPtr
698 decodeNeonUTwoMiscUSReg(unsigned size,
699 ExtMachInst machInst, IntRegIndex dest,
700 IntRegIndex op1)
701 {
702 switch (size) {
703 case 0:
704 return new Base<uint8_t>(machInst, dest, op1);
705 case 1:
706 return new Base<uint16_t>(machInst, dest, op1);
707 case 2:
708 return new Base<uint32_t>(machInst, dest, op1);
709 default:
710 return new Unknown(machInst);
711 }
712 }
713
714 template <template <typename T> class Base>
715 StaticInstPtr
716 decodeNeonSTwoMiscUSReg(unsigned size,
717 ExtMachInst machInst, IntRegIndex dest,
718 IntRegIndex op1)
719 {
720 switch (size) {
721 case 0:
722 return new Base<int8_t>(machInst, dest, op1);
723 case 1:
724 return new Base<int16_t>(machInst, dest, op1);
725 case 2:
726 return new Base<int32_t>(machInst, dest, op1);
727 default:
728 return new Unknown(machInst);
729 }
730 }
731
732 template <template <typename T> class BaseD,
733 template <typename T> class BaseQ>
734 StaticInstPtr
735 decodeNeonUTwoMiscSReg(bool q, unsigned size,
736 ExtMachInst machInst, IntRegIndex dest,
737 IntRegIndex op1)
738 {
739 if (q) {
740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741 } else {
742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743 }
744 }
745
746 template <template <typename T> class BaseD,
747 template <typename T> class BaseQ>
748 StaticInstPtr
749 decodeNeonSTwoMiscSReg(bool q, unsigned size,
750 ExtMachInst machInst, IntRegIndex dest,
751 IntRegIndex op1)
752 {
753 if (q) {
754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755 } else {
756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757 }
758 }
759
760 template <template <typename T> class Base>
761 StaticInstPtr
762 decodeNeonUTwoMiscUReg(unsigned size,
763 ExtMachInst machInst, IntRegIndex dest,
764 IntRegIndex op1)
765 {
766 switch (size) {
767 case 0:
768 return new Base<uint8_t>(machInst, dest, op1);
769 case 1:
770 return new Base<uint16_t>(machInst, dest, op1);
771 case 2:
772 return new Base<uint32_t>(machInst, dest, op1);
773 case 3:
774 return new Base<uint64_t>(machInst, dest, op1);
775 default:
776 return new Unknown(machInst);
777 }
778 }
779
780 template <template <typename T> class Base>
781 StaticInstPtr
782 decodeNeonSTwoMiscUReg(unsigned size,
783 ExtMachInst machInst, IntRegIndex dest,
784 IntRegIndex op1)
785 {
786 switch (size) {
787 case 0:
788 return new Base<int8_t>(machInst, dest, op1);
789 case 1:
790 return new Base<int16_t>(machInst, dest, op1);
791 case 2:
792 return new Base<int32_t>(machInst, dest, op1);
793 case 3:
794 return new Base<int64_t>(machInst, dest, op1);
795 default:
796 return new Unknown(machInst);
797 }
798 }
799
800 template <template <typename T> class BaseD,
801 template <typename T> class BaseQ>
802 StaticInstPtr
803 decodeNeonSTwoMiscReg(bool q, unsigned size,
804 ExtMachInst machInst, IntRegIndex dest,
805 IntRegIndex op1)
806 {
807 if (q) {
808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809 } else {
810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811 }
812 }
813
814 template <template <typename T> class BaseD,
815 template <typename T> class BaseQ>
816 StaticInstPtr
817 decodeNeonUTwoMiscReg(bool q, unsigned size,
818 ExtMachInst machInst, IntRegIndex dest,
819 IntRegIndex op1)
820 {
821 if (q) {
822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823 } else {
824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825 }
826 }
827
828 template <template <typename T> class BaseD,
829 template <typename T> class BaseQ>
830 StaticInstPtr
831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832 ExtMachInst machInst, IntRegIndex dest,
833 IntRegIndex op1)
834 {
835 if (notSigned) {
836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837 q, size, machInst, dest, op1);
838 } else {
839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840 q, size, machInst, dest, op1);
841 }
842 }
843
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
846 StaticInstPtr
847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848 IntRegIndex dest, IntRegIndex op1)
849 {
850 if (q) {
851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852 } else {
853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854 }
855 }
856
857 template <template <typename T> class BaseD,
858 template <typename T> class BaseQ>
859 StaticInstPtr
860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861 IntRegIndex dest, IntRegIndex op1)
862 {
863 if (q) {
864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865 } else {
866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867 }
868 }
869
870 template <template <typename T> class BaseD,
871 template <typename T> class BaseQ>
872 StaticInstPtr
873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874 IntRegIndex dest, IntRegIndex op1)
875 {
876 if (q) {
877 if (size)
878 return new BaseQ<uint64_t>(machInst, dest, op1);
879 else
880 return new BaseQ<uint32_t>(machInst, dest, op1);
881 } else {
882 if (size)
883 return new Unknown(machInst);
884 else
885 return new BaseD<uint32_t>(machInst, dest, op1);
886 }
887 }
888
889 template <template <typename T> class BaseD,
890 template <typename T> class BaseQ>
891 StaticInstPtr
892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893 IntRegIndex dest, IntRegIndex op1)
894 {
895 if (size)
896 return new BaseQ<uint64_t>(machInst, dest, op1);
897 else
898 return new BaseD<uint32_t>(machInst, dest, op1);
899 }
900
901 template <template <typename T> class Base>
902 StaticInstPtr
903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904 IntRegIndex dest, IntRegIndex op1)
905 {
906 if (size)
907 return new Base<uint64_t>(machInst, dest, op1);
908 else
909 return new Base<uint32_t>(machInst, dest, op1);
910 }
911
912 template <template <typename T> class BaseD,
913 template <typename T> class BaseQ>
914 StaticInstPtr
915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1)
917 {
918 if (q) {
919 switch (size) {
920 case 0x0:
921 return new BaseQ<uint8_t>(machInst, dest, op1);
922 case 0x1:
923 return new BaseQ<uint16_t>(machInst, dest, op1);
924 case 0x2:
925 return new BaseQ<uint32_t>(machInst, dest, op1);
926 default:
927 return new Unknown(machInst);
928 }
929 } else {
930 switch (size) {
931 case 0x0:
932 return new BaseD<uint8_t>(machInst, dest, op1);
933 case 0x1:
934 return new BaseD<uint16_t>(machInst, dest, op1);
935 default:
936 return new Unknown(machInst);
937 }
938 }
939 }
940
941 template <template <typename T> class BaseD,
942 template <typename T> class BaseQ,
943 template <typename T> class BaseBQ>
944 StaticInstPtr
945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946 IntRegIndex dest, IntRegIndex op1)
947 {
948 if (q) {
949 switch (size) {
950 case 0x0:
951 return new BaseQ<uint8_t>(machInst, dest, op1);
952 case 0x1:
953 return new BaseQ<uint16_t>(machInst, dest, op1);
954 case 0x2:
955 return new BaseBQ<uint32_t>(machInst, dest, op1);
956 default:
957 return new Unknown(machInst);
958 }
959 } else {
960 switch (size) {
961 case 0x0:
962 return new BaseD<uint8_t>(machInst, dest, op1);
963 case 0x1:
964 return new BaseD<uint16_t>(machInst, dest, op1);
965 default:
966 return new Unknown(machInst);
967 }
968 }
969 }
970
971 template <template <typename T> class BaseD,
972 template <typename T> class BaseQ>
973 StaticInstPtr
974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975 IntRegIndex dest, IntRegIndex op1)
976 {
977 if (q) {
978 switch (size) {
979 case 0x0:
980 return new BaseQ<int8_t>(machInst, dest, op1);
981 case 0x1:
982 return new BaseQ<int16_t>(machInst, dest, op1);
983 case 0x2:
984 return new BaseQ<int32_t>(machInst, dest, op1);
985 default:
986 return new Unknown(machInst);
987 }
988 } else {
989 switch (size) {
990 case 0x0:
991 return new BaseD<int8_t>(machInst, dest, op1);
992 case 0x1:
993 return new BaseD<int16_t>(machInst, dest, op1);
994 default:
995 return new Unknown(machInst);
996 }
997 }
998 }
999
1000 template <template <typename T> class BaseD,
1001 template <typename T> class BaseQ,
1002 template <typename T> class BaseBQ>
1003 StaticInstPtr
1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005 IntRegIndex dest, IntRegIndex op1)
1006 {
1007 if (q) {
1008 switch (size) {
1009 case 0x0:
1010 return new BaseQ<uint8_t>(machInst, dest, op1);
1011 case 0x1:
1012 return new BaseQ<uint16_t>(machInst, dest, op1);
1013 case 0x2:
1014 return new BaseBQ<uint32_t>(machInst, dest, op1);
1015 default:
1016 return new Unknown(machInst);
1017 }
1018 } else {
1019 switch (size) {
1020 case 0x0:
1021 return new BaseD<uint8_t>(machInst, dest, op1);
1022 case 0x1:
1023 return new BaseD<uint16_t>(machInst, dest, op1);
1024 default:
1025 return new Unknown(machInst);
1026 }
1027 }
1028 }
1029
1030 template <template <typename T> class BaseD,
1031 template <typename T> class BaseQ,
1032 template <typename T> class BaseBQ>
1033 StaticInstPtr
1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035 IntRegIndex dest, IntRegIndex op1)
1036 {
1037 if (q) {
1038 switch (size) {
1039 case 0x0:
1040 return new BaseQ<int8_t>(machInst, dest, op1);
1041 case 0x1:
1042 return new BaseQ<int16_t>(machInst, dest, op1);
1043 case 0x2:
1044 return new BaseBQ<int32_t>(machInst, dest, op1);
1045 default:
1046 return new Unknown(machInst);
1047 }
1048 } else {
1049 switch (size) {
1050 case 0x0:
1051 return new BaseD<int8_t>(machInst, dest, op1);
1052 case 0x1:
1053 return new BaseD<int16_t>(machInst, dest, op1);
1054 default:
1055 return new Unknown(machInst);
1056 }
1057 }
1058 }
1059}};
1060
1061let {{
1062 header_output = ""
1063 exec_output = ""
1064
1065 vcompares = '''
1066 static float
1067 vcgtFunc(float op1, float op2)
1068 {
1069 if (std::isnan(op1) || std::isnan(op2))
1070 return 2.0;
1071 return (op1 > op2) ? 0.0 : 1.0;
1072 }
1073
1074 static float
1075 vcgeFunc(float op1, float op2)
1076 {
1077 if (std::isnan(op1) || std::isnan(op2))
1078 return 2.0;
1079 return (op1 >= op2) ? 0.0 : 1.0;
1080 }
1081
1082 static float
1083 vceqFunc(float op1, float op2)
1084 {
1085 if (isSnan(op1) || isSnan(op2))
1086 return 2.0;
1087 return (op1 == op2) ? 0.0 : 1.0;
1088 }
1089'''
1090 vcomparesL = '''
1091 static float
1092 vcleFunc(float op1, float op2)
1093 {
1094 if (std::isnan(op1) || std::isnan(op2))
1095 return 2.0;
1096 return (op1 <= op2) ? 0.0 : 1.0;
1097 }
1098
1099 static float
1100 vcltFunc(float op1, float op2)
1101 {
1102 if (std::isnan(op1) || std::isnan(op2))
1103 return 2.0;
1104 return (op1 < op2) ? 0.0 : 1.0;
1105 }
1106'''
1107 vacomparesG = '''
1108 static float
1109 vacgtFunc(float op1, float op2)
1110 {
1111 if (std::isnan(op1) || std::isnan(op2))
1112 return 2.0;
1113 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114 }
1115
1116 static float
1117 vacgeFunc(float op1, float op2)
1118 {
1119 if (std::isnan(op1) || std::isnan(op2))
1120 return 2.0;
1121 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122 }
1123'''
1124
1125 exec_output += vcompares + vacomparesG
1126
1127 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130 signedTypes = smallSignedTypes + ("int64_t",)
1131 smallTypes = smallUnsignedTypes + smallSignedTypes
1132 allTypes = unsignedTypes + signedTypes
1133
1134 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135 readDest=False, pairwise=False):
1136 global header_output, exec_output
1137 eWalkCode = simdEnabledCheckCode + '''
1138 RegVect srcReg1, srcReg2, destReg;
1139 '''
1140 for reg in range(rCount):
1141 eWalkCode += '''
1142 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144 ''' % { "reg" : reg }
1145 if readDest:
1146 eWalkCode += '''
1147 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148 ''' % { "reg" : reg }
1149 readDestCode = ''
1150 if readDest:
1151 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152 if pairwise:
1153 eWalkCode += '''
1154 for (unsigned i = 0; i < eCount; i++) {
1155 Element srcElem1 = gtoh(2 * i < eCount ?
1156 srcReg1.elements[2 * i] :
1157 srcReg2.elements[2 * i - eCount]);
1158 Element srcElem2 = gtoh(2 * i < eCount ?
1159 srcReg1.elements[2 * i + 1] :
1160 srcReg2.elements[2 * i + 1 - eCount]);
1161 Element destElem;
1162 %(readDest)s
1163 %(op)s
1164 destReg.elements[i] = htog(destElem);
1165 }
1166 ''' % { "op" : op, "readDest" : readDestCode }
1167 else:
1168 eWalkCode += '''
1169 for (unsigned i = 0; i < eCount; i++) {
1170 Element srcElem1 = gtoh(srcReg1.elements[i]);
1171 Element srcElem2 = gtoh(srcReg2.elements[i]);
1172 Element destElem;
1173 %(readDest)s
1174 %(op)s
1175 destReg.elements[i] = htog(destElem);
1176 }
1177 ''' % { "op" : op, "readDest" : readDestCode }
1178 for reg in range(rCount):
1179 eWalkCode += '''
1180 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181 ''' % { "reg" : reg }
1182 iop = InstObjParams(name, Name,
1183 "RegRegRegOp",
1184 { "code": eWalkCode,
1185 "r_count": rCount,
1186 "predicate_test": predicateTest,
1187 "op_class": opClass }, [])
1188 header_output += NeonRegRegRegOpDeclare.subst(iop)
1189 exec_output += NeonEqualRegExecute.subst(iop)
1190 for type in types:
1191 substDict = { "targs" : type,
1192 "class_name" : Name }
1193 exec_output += NeonExecDeclare.subst(substDict)
1194
1195 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196 readDest=False, pairwise=False, toInt=False):
1197 global header_output, exec_output
1198 eWalkCode = simdEnabledCheckCode + '''
1199 typedef FloatReg FloatVect[rCount];
1199 typedef float FloatVect[rCount];
1200 FloatVect srcRegs1, srcRegs2;
1201 '''
1202 if toInt:
1203 eWalkCode += 'RegVect destRegs;\n'
1204 else:
1205 eWalkCode += 'FloatVect destRegs;\n'
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210 ''' % { "reg" : reg }
1211 if readDest:
1212 if toInt:
1213 eWalkCode += '''
1214 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215 ''' % { "reg" : reg }
1216 else:
1217 eWalkCode += '''
1218 destRegs[%(reg)d] = FpDestP%(reg)d;
1219 ''' % { "reg" : reg }
1220 readDestCode = ''
1221 if readDest:
1222 readDestCode = 'destReg = destRegs[r];'
1200 FloatVect srcRegs1, srcRegs2;
1201 '''
1202 if toInt:
1203 eWalkCode += 'RegVect destRegs;\n'
1204 else:
1205 eWalkCode += 'FloatVect destRegs;\n'
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210 ''' % { "reg" : reg }
1211 if readDest:
1212 if toInt:
1213 eWalkCode += '''
1214 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215 ''' % { "reg" : reg }
1216 else:
1217 eWalkCode += '''
1218 destRegs[%(reg)d] = FpDestP%(reg)d;
1219 ''' % { "reg" : reg }
1220 readDestCode = ''
1221 if readDest:
1222 readDestCode = 'destReg = destRegs[r];'
1223 destType = 'FloatReg'
1223 destType = 'float'
1224 writeDest = 'destRegs[r] = destReg;'
1225 if toInt:
1224 writeDest = 'destRegs[r] = destReg;'
1225 if toInt:
1226 destType = 'FloatRegBits'
1226 destType = 'uint32_t'
1227 writeDest = 'destRegs.regs[r] = destReg;'
1228 if pairwise:
1229 eWalkCode += '''
1230 for (unsigned r = 0; r < rCount; r++) {
1227 writeDest = 'destRegs.regs[r] = destReg;'
1228 if pairwise:
1229 eWalkCode += '''
1230 for (unsigned r = 0; r < rCount; r++) {
1231 FloatReg srcReg1 = (2 * r < rCount) ?
1231 float srcReg1 = (2 * r < rCount) ?
1232 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1232 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1233 FloatReg srcReg2 = (2 * r < rCount) ?
1233 float srcReg2 = (2 * r < rCount) ?
1234 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235 %(destType)s destReg;
1236 %(readDest)s
1237 %(op)s
1238 %(writeDest)s
1239 }
1240 ''' % { "op" : op,
1241 "readDest" : readDestCode,
1242 "destType" : destType,
1243 "writeDest" : writeDest }
1244 else:
1245 eWalkCode += '''
1246 for (unsigned r = 0; r < rCount; r++) {
1234 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235 %(destType)s destReg;
1236 %(readDest)s
1237 %(op)s
1238 %(writeDest)s
1239 }
1240 ''' % { "op" : op,
1241 "readDest" : readDestCode,
1242 "destType" : destType,
1243 "writeDest" : writeDest }
1244 else:
1245 eWalkCode += '''
1246 for (unsigned r = 0; r < rCount; r++) {
1247 FloatReg srcReg1 = srcRegs1[r];
1248 FloatReg srcReg2 = srcRegs2[r];
1247 float srcReg1 = srcRegs1[r];
1248 float srcReg2 = srcRegs2[r];
1249 %(destType)s destReg;
1250 %(readDest)s
1251 %(op)s
1252 %(writeDest)s
1253 }
1254 ''' % { "op" : op,
1255 "readDest" : readDestCode,
1256 "destType" : destType,
1257 "writeDest" : writeDest }
1258 for reg in range(rCount):
1259 if toInt:
1260 eWalkCode += '''
1261 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262 ''' % { "reg" : reg }
1263 else:
1264 eWalkCode += '''
1265 FpDestP%(reg)d = destRegs[%(reg)d];
1266 ''' % { "reg" : reg }
1267 iop = InstObjParams(name, Name,
1268 "FpRegRegRegOp",
1269 { "code": eWalkCode,
1270 "r_count": rCount,
1271 "predicate_test": predicateTest,
1272 "op_class": opClass }, [])
1273 header_output += NeonRegRegRegOpDeclare.subst(iop)
1274 exec_output += NeonEqualRegExecute.subst(iop)
1275 for type in types:
1276 substDict = { "targs" : type,
1277 "class_name" : Name }
1278 exec_output += NeonExecDeclare.subst(substDict)
1279
1280 def threeUnequalRegInst(name, Name, opClass, types, op,
1281 bigSrc1, bigSrc2, bigDest, readDest):
1282 global header_output, exec_output
1283 src1Cnt = src2Cnt = destCnt = 2
1284 src1Prefix = src2Prefix = destPrefix = ''
1285 if bigSrc1:
1286 src1Cnt = 4
1287 src1Prefix = 'Big'
1288 if bigSrc2:
1289 src2Cnt = 4
1290 src2Prefix = 'Big'
1291 if bigDest:
1292 destCnt = 4
1293 destPrefix = 'Big'
1294 eWalkCode = simdEnabledCheckCode + '''
1295 %sRegVect srcReg1;
1296 %sRegVect srcReg2;
1297 %sRegVect destReg;
1298 ''' % (src1Prefix, src2Prefix, destPrefix)
1299 for reg in range(src1Cnt):
1300 eWalkCode += '''
1301 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302 ''' % { "reg" : reg }
1303 for reg in range(src2Cnt):
1304 eWalkCode += '''
1305 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306 ''' % { "reg" : reg }
1307 if readDest:
1308 for reg in range(destCnt):
1309 eWalkCode += '''
1310 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311 ''' % { "reg" : reg }
1312 readDestCode = ''
1313 if readDest:
1314 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315 eWalkCode += '''
1316 for (unsigned i = 0; i < eCount; i++) {
1317 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319 %(destPrefix)sElement destElem;
1320 %(readDest)s
1321 %(op)s
1322 destReg.elements[i] = htog(destElem);
1323 }
1324 ''' % { "op" : op, "readDest" : readDestCode,
1325 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326 "destPrefix" : destPrefix }
1327 for reg in range(destCnt):
1328 eWalkCode += '''
1329 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "RegRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": 2,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegRegOpDeclare.subst(iop)
1338 exec_output += NeonUnequalRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345 threeUnequalRegInst(name, Name, opClass, types, op,
1346 True, True, False, readDest)
1347
1348 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349 threeUnequalRegInst(name, Name, opClass, types, op,
1350 False, False, True, readDest)
1351
1352 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353 threeUnequalRegInst(name, Name, opClass, types, op,
1354 True, False, True, readDest)
1355
1356 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357 global header_output, exec_output
1358 eWalkCode = simdEnabledCheckCode + '''
1359 RegVect srcReg1, srcReg2, destReg;
1360 '''
1361 for reg in range(rCount):
1362 eWalkCode += '''
1363 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365 ''' % { "reg" : reg }
1366 if readDest:
1367 eWalkCode += '''
1368 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369 ''' % { "reg" : reg }
1370 readDestCode = ''
1371 if readDest:
1372 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373 eWalkCode += '''
1374 if (imm < 0 && imm >= eCount) {
1375 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1376 mnemonic);
1377 } else {
1378 for (unsigned i = 0; i < eCount; i++) {
1379 Element srcElem1 = gtoh(srcReg1.elements[i]);
1380 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1381 Element destElem;
1382 %(readDest)s
1383 %(op)s
1384 destReg.elements[i] = htog(destElem);
1385 }
1386 }
1387 ''' % { "op" : op, "readDest" : readDestCode }
1388 for reg in range(rCount):
1389 eWalkCode += '''
1390 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1391 ''' % { "reg" : reg }
1392 iop = InstObjParams(name, Name,
1393 "RegRegRegImmOp",
1394 { "code": eWalkCode,
1395 "r_count": rCount,
1396 "predicate_test": predicateTest,
1397 "op_class": opClass }, [])
1398 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1399 exec_output += NeonEqualRegExecute.subst(iop)
1400 for type in types:
1401 substDict = { "targs" : type,
1402 "class_name" : Name }
1403 exec_output += NeonExecDeclare.subst(substDict)
1404
1405 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1406 global header_output, exec_output
1407 rCount = 2
1408 eWalkCode = simdEnabledCheckCode + '''
1409 RegVect srcReg1, srcReg2;
1410 BigRegVect destReg;
1411 '''
1412 for reg in range(rCount):
1413 eWalkCode += '''
1414 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1415 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1416 ''' % { "reg" : reg }
1417 if readDest:
1418 for reg in range(2 * rCount):
1419 eWalkCode += '''
1420 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1421 ''' % { "reg" : reg }
1422 readDestCode = ''
1423 if readDest:
1424 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1425 eWalkCode += '''
1426 if (imm < 0 && imm >= eCount) {
1427 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1428 mnemonic);
1429 } else {
1430 for (unsigned i = 0; i < eCount; i++) {
1431 Element srcElem1 = gtoh(srcReg1.elements[i]);
1432 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1433 BigElement destElem;
1434 %(readDest)s
1435 %(op)s
1436 destReg.elements[i] = htog(destElem);
1437 }
1438 }
1439 ''' % { "op" : op, "readDest" : readDestCode }
1440 for reg in range(2 * rCount):
1441 eWalkCode += '''
1442 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1443 ''' % { "reg" : reg }
1444 iop = InstObjParams(name, Name,
1445 "RegRegRegImmOp",
1446 { "code": eWalkCode,
1447 "r_count": rCount,
1448 "predicate_test": predicateTest,
1449 "op_class": opClass }, [])
1450 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1451 exec_output += NeonUnequalRegExecute.subst(iop)
1452 for type in types:
1453 substDict = { "targs" : type,
1454 "class_name" : Name }
1455 exec_output += NeonExecDeclare.subst(substDict)
1456
1457 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1458 global header_output, exec_output
1459 eWalkCode = simdEnabledCheckCode + '''
1249 %(destType)s destReg;
1250 %(readDest)s
1251 %(op)s
1252 %(writeDest)s
1253 }
1254 ''' % { "op" : op,
1255 "readDest" : readDestCode,
1256 "destType" : destType,
1257 "writeDest" : writeDest }
1258 for reg in range(rCount):
1259 if toInt:
1260 eWalkCode += '''
1261 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262 ''' % { "reg" : reg }
1263 else:
1264 eWalkCode += '''
1265 FpDestP%(reg)d = destRegs[%(reg)d];
1266 ''' % { "reg" : reg }
1267 iop = InstObjParams(name, Name,
1268 "FpRegRegRegOp",
1269 { "code": eWalkCode,
1270 "r_count": rCount,
1271 "predicate_test": predicateTest,
1272 "op_class": opClass }, [])
1273 header_output += NeonRegRegRegOpDeclare.subst(iop)
1274 exec_output += NeonEqualRegExecute.subst(iop)
1275 for type in types:
1276 substDict = { "targs" : type,
1277 "class_name" : Name }
1278 exec_output += NeonExecDeclare.subst(substDict)
1279
1280 def threeUnequalRegInst(name, Name, opClass, types, op,
1281 bigSrc1, bigSrc2, bigDest, readDest):
1282 global header_output, exec_output
1283 src1Cnt = src2Cnt = destCnt = 2
1284 src1Prefix = src2Prefix = destPrefix = ''
1285 if bigSrc1:
1286 src1Cnt = 4
1287 src1Prefix = 'Big'
1288 if bigSrc2:
1289 src2Cnt = 4
1290 src2Prefix = 'Big'
1291 if bigDest:
1292 destCnt = 4
1293 destPrefix = 'Big'
1294 eWalkCode = simdEnabledCheckCode + '''
1295 %sRegVect srcReg1;
1296 %sRegVect srcReg2;
1297 %sRegVect destReg;
1298 ''' % (src1Prefix, src2Prefix, destPrefix)
1299 for reg in range(src1Cnt):
1300 eWalkCode += '''
1301 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302 ''' % { "reg" : reg }
1303 for reg in range(src2Cnt):
1304 eWalkCode += '''
1305 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306 ''' % { "reg" : reg }
1307 if readDest:
1308 for reg in range(destCnt):
1309 eWalkCode += '''
1310 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311 ''' % { "reg" : reg }
1312 readDestCode = ''
1313 if readDest:
1314 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315 eWalkCode += '''
1316 for (unsigned i = 0; i < eCount; i++) {
1317 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319 %(destPrefix)sElement destElem;
1320 %(readDest)s
1321 %(op)s
1322 destReg.elements[i] = htog(destElem);
1323 }
1324 ''' % { "op" : op, "readDest" : readDestCode,
1325 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326 "destPrefix" : destPrefix }
1327 for reg in range(destCnt):
1328 eWalkCode += '''
1329 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "RegRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": 2,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegRegOpDeclare.subst(iop)
1338 exec_output += NeonUnequalRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345 threeUnequalRegInst(name, Name, opClass, types, op,
1346 True, True, False, readDest)
1347
1348 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349 threeUnequalRegInst(name, Name, opClass, types, op,
1350 False, False, True, readDest)
1351
1352 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353 threeUnequalRegInst(name, Name, opClass, types, op,
1354 True, False, True, readDest)
1355
1356 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357 global header_output, exec_output
1358 eWalkCode = simdEnabledCheckCode + '''
1359 RegVect srcReg1, srcReg2, destReg;
1360 '''
1361 for reg in range(rCount):
1362 eWalkCode += '''
1363 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365 ''' % { "reg" : reg }
1366 if readDest:
1367 eWalkCode += '''
1368 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369 ''' % { "reg" : reg }
1370 readDestCode = ''
1371 if readDest:
1372 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373 eWalkCode += '''
1374 if (imm < 0 && imm >= eCount) {
1375 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1376 mnemonic);
1377 } else {
1378 for (unsigned i = 0; i < eCount; i++) {
1379 Element srcElem1 = gtoh(srcReg1.elements[i]);
1380 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1381 Element destElem;
1382 %(readDest)s
1383 %(op)s
1384 destReg.elements[i] = htog(destElem);
1385 }
1386 }
1387 ''' % { "op" : op, "readDest" : readDestCode }
1388 for reg in range(rCount):
1389 eWalkCode += '''
1390 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1391 ''' % { "reg" : reg }
1392 iop = InstObjParams(name, Name,
1393 "RegRegRegImmOp",
1394 { "code": eWalkCode,
1395 "r_count": rCount,
1396 "predicate_test": predicateTest,
1397 "op_class": opClass }, [])
1398 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1399 exec_output += NeonEqualRegExecute.subst(iop)
1400 for type in types:
1401 substDict = { "targs" : type,
1402 "class_name" : Name }
1403 exec_output += NeonExecDeclare.subst(substDict)
1404
1405 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1406 global header_output, exec_output
1407 rCount = 2
1408 eWalkCode = simdEnabledCheckCode + '''
1409 RegVect srcReg1, srcReg2;
1410 BigRegVect destReg;
1411 '''
1412 for reg in range(rCount):
1413 eWalkCode += '''
1414 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1415 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1416 ''' % { "reg" : reg }
1417 if readDest:
1418 for reg in range(2 * rCount):
1419 eWalkCode += '''
1420 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1421 ''' % { "reg" : reg }
1422 readDestCode = ''
1423 if readDest:
1424 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1425 eWalkCode += '''
1426 if (imm < 0 && imm >= eCount) {
1427 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1428 mnemonic);
1429 } else {
1430 for (unsigned i = 0; i < eCount; i++) {
1431 Element srcElem1 = gtoh(srcReg1.elements[i]);
1432 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1433 BigElement destElem;
1434 %(readDest)s
1435 %(op)s
1436 destReg.elements[i] = htog(destElem);
1437 }
1438 }
1439 ''' % { "op" : op, "readDest" : readDestCode }
1440 for reg in range(2 * rCount):
1441 eWalkCode += '''
1442 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1443 ''' % { "reg" : reg }
1444 iop = InstObjParams(name, Name,
1445 "RegRegRegImmOp",
1446 { "code": eWalkCode,
1447 "r_count": rCount,
1448 "predicate_test": predicateTest,
1449 "op_class": opClass }, [])
1450 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1451 exec_output += NeonUnequalRegExecute.subst(iop)
1452 for type in types:
1453 substDict = { "targs" : type,
1454 "class_name" : Name }
1455 exec_output += NeonExecDeclare.subst(substDict)
1456
1457 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1458 global header_output, exec_output
1459 eWalkCode = simdEnabledCheckCode + '''
1460 typedef FloatReg FloatVect[rCount];
1460 typedef float FloatVect[rCount];
1461 FloatVect srcRegs1, srcRegs2, destRegs;
1462 '''
1463 for reg in range(rCount):
1464 eWalkCode += '''
1465 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1466 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1467 ''' % { "reg" : reg }
1468 if readDest:
1469 eWalkCode += '''
1470 destRegs[%(reg)d] = FpDestP%(reg)d;
1471 ''' % { "reg" : reg }
1472 readDestCode = ''
1473 if readDest:
1474 readDestCode = 'destReg = destRegs[i];'
1475 eWalkCode += '''
1476 if (imm < 0 && imm >= eCount) {
1477 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1478 mnemonic);
1479 } else {
1480 for (unsigned i = 0; i < rCount; i++) {
1461 FloatVect srcRegs1, srcRegs2, destRegs;
1462 '''
1463 for reg in range(rCount):
1464 eWalkCode += '''
1465 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1466 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1467 ''' % { "reg" : reg }
1468 if readDest:
1469 eWalkCode += '''
1470 destRegs[%(reg)d] = FpDestP%(reg)d;
1471 ''' % { "reg" : reg }
1472 readDestCode = ''
1473 if readDest:
1474 readDestCode = 'destReg = destRegs[i];'
1475 eWalkCode += '''
1476 if (imm < 0 && imm >= eCount) {
1477 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1478 mnemonic);
1479 } else {
1480 for (unsigned i = 0; i < rCount; i++) {
1481 FloatReg srcReg1 = srcRegs1[i];
1482 FloatReg srcReg2 = srcRegs2[imm];
1483 FloatReg destReg;
1481 float srcReg1 = srcRegs1[i];
1482 float srcReg2 = srcRegs2[imm];
1483 float destReg;
1484 %(readDest)s
1485 %(op)s
1486 destRegs[i] = destReg;
1487 }
1488 }
1489 ''' % { "op" : op, "readDest" : readDestCode }
1490 for reg in range(rCount):
1491 eWalkCode += '''
1492 FpDestP%(reg)d = destRegs[%(reg)d];
1493 ''' % { "reg" : reg }
1494 iop = InstObjParams(name, Name,
1495 "FpRegRegRegImmOp",
1496 { "code": eWalkCode,
1497 "r_count": rCount,
1498 "predicate_test": predicateTest,
1499 "op_class": opClass }, [])
1500 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1501 exec_output += NeonEqualRegExecute.subst(iop)
1502 for type in types:
1503 substDict = { "targs" : type,
1504 "class_name" : Name }
1505 exec_output += NeonExecDeclare.subst(substDict)
1506
1507 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1508 readDest=False, toInt=False, fromInt=False):
1509 global header_output, exec_output
1510 eWalkCode = simdEnabledCheckCode + '''
1511 RegVect srcRegs1, destRegs;
1512 '''
1513 for reg in range(rCount):
1514 eWalkCode += '''
1515 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1516 ''' % { "reg" : reg }
1517 if readDest:
1518 eWalkCode += '''
1519 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1520 ''' % { "reg" : reg }
1521 readDestCode = ''
1522 if readDest:
1523 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1524 if toInt:
1525 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1526 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1527 if fromInt:
1484 %(readDest)s
1485 %(op)s
1486 destRegs[i] = destReg;
1487 }
1488 }
1489 ''' % { "op" : op, "readDest" : readDestCode }
1490 for reg in range(rCount):
1491 eWalkCode += '''
1492 FpDestP%(reg)d = destRegs[%(reg)d];
1493 ''' % { "reg" : reg }
1494 iop = InstObjParams(name, Name,
1495 "FpRegRegRegImmOp",
1496 { "code": eWalkCode,
1497 "r_count": rCount,
1498 "predicate_test": predicateTest,
1499 "op_class": opClass }, [])
1500 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1501 exec_output += NeonEqualRegExecute.subst(iop)
1502 for type in types:
1503 substDict = { "targs" : type,
1504 "class_name" : Name }
1505 exec_output += NeonExecDeclare.subst(substDict)
1506
1507 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1508 readDest=False, toInt=False, fromInt=False):
1509 global header_output, exec_output
1510 eWalkCode = simdEnabledCheckCode + '''
1511 RegVect srcRegs1, destRegs;
1512 '''
1513 for reg in range(rCount):
1514 eWalkCode += '''
1515 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1516 ''' % { "reg" : reg }
1517 if readDest:
1518 eWalkCode += '''
1519 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1520 ''' % { "reg" : reg }
1521 readDestCode = ''
1522 if readDest:
1523 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1524 if toInt:
1525 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1526 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1527 if fromInt:
1528 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1528 readOpCode = 'uint32_t srcReg1 = gtoh(srcRegs1.regs[i]);'
1529 declDest = 'Element destElem;'
1530 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1531 if toInt:
1529 declDest = 'Element destElem;'
1530 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1531 if toInt:
1532 declDest = 'FloatRegBits destReg;'
1532 declDest = 'uint32_t destReg;'
1533 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1534 eWalkCode += '''
1535 for (unsigned i = 0; i < eCount; i++) {
1536 %(readOp)s
1537 %(declDest)s
1538 %(readDest)s
1539 %(op)s
1540 %(writeDest)s
1541 }
1542 ''' % { "readOp" : readOpCode,
1543 "declDest" : declDest,
1544 "readDest" : readDestCode,
1545 "op" : op,
1546 "writeDest" : writeDestCode }
1547 for reg in range(rCount):
1548 eWalkCode += '''
1549 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1550 ''' % { "reg" : reg }
1551 iop = InstObjParams(name, Name,
1552 "RegRegImmOp",
1553 { "code": eWalkCode,
1554 "r_count": rCount,
1555 "predicate_test": predicateTest,
1556 "op_class": opClass }, [])
1557 header_output += NeonRegRegImmOpDeclare.subst(iop)
1558 exec_output += NeonEqualRegExecute.subst(iop)
1559 for type in types:
1560 substDict = { "targs" : type,
1561 "class_name" : Name }
1562 exec_output += NeonExecDeclare.subst(substDict)
1563
1564 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1565 global header_output, exec_output
1566 eWalkCode = simdEnabledCheckCode + '''
1567 BigRegVect srcReg1;
1568 RegVect destReg;
1569 '''
1570 for reg in range(4):
1571 eWalkCode += '''
1572 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1573 ''' % { "reg" : reg }
1574 if readDest:
1575 for reg in range(2):
1576 eWalkCode += '''
1577 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1578 ''' % { "reg" : reg }
1579 readDestCode = ''
1580 if readDest:
1581 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1582 eWalkCode += '''
1583 for (unsigned i = 0; i < eCount; i++) {
1584 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1585 Element destElem;
1586 %(readDest)s
1587 %(op)s
1588 destReg.elements[i] = htog(destElem);
1589 }
1590 ''' % { "op" : op, "readDest" : readDestCode }
1591 for reg in range(2):
1592 eWalkCode += '''
1593 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1594 ''' % { "reg" : reg }
1595 iop = InstObjParams(name, Name,
1596 "RegRegImmOp",
1597 { "code": eWalkCode,
1598 "r_count": 2,
1599 "predicate_test": predicateTest,
1600 "op_class": opClass }, [])
1601 header_output += NeonRegRegImmOpDeclare.subst(iop)
1602 exec_output += NeonUnequalRegExecute.subst(iop)
1603 for type in types:
1604 substDict = { "targs" : type,
1605 "class_name" : Name }
1606 exec_output += NeonExecDeclare.subst(substDict)
1607
1608 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1609 global header_output, exec_output
1610 eWalkCode = simdEnabledCheckCode + '''
1611 RegVect srcReg1;
1612 BigRegVect destReg;
1613 '''
1614 for reg in range(2):
1615 eWalkCode += '''
1616 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1617 ''' % { "reg" : reg }
1618 if readDest:
1619 for reg in range(4):
1620 eWalkCode += '''
1621 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1622 ''' % { "reg" : reg }
1623 readDestCode = ''
1624 if readDest:
1625 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1626 eWalkCode += '''
1627 for (unsigned i = 0; i < eCount; i++) {
1628 Element srcElem1 = gtoh(srcReg1.elements[i]);
1629 BigElement destElem;
1630 %(readDest)s
1631 %(op)s
1632 destReg.elements[i] = htog(destElem);
1633 }
1634 ''' % { "op" : op, "readDest" : readDestCode }
1635 for reg in range(4):
1636 eWalkCode += '''
1637 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1638 ''' % { "reg" : reg }
1639 iop = InstObjParams(name, Name,
1640 "RegRegImmOp",
1641 { "code": eWalkCode,
1642 "r_count": 2,
1643 "predicate_test": predicateTest,
1644 "op_class": opClass }, [])
1645 header_output += NeonRegRegImmOpDeclare.subst(iop)
1646 exec_output += NeonUnequalRegExecute.subst(iop)
1647 for type in types:
1648 substDict = { "targs" : type,
1649 "class_name" : Name }
1650 exec_output += NeonExecDeclare.subst(substDict)
1651
1652 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1653 global header_output, exec_output
1654 eWalkCode = simdEnabledCheckCode + '''
1655 RegVect srcReg1, destReg;
1656 '''
1657 for reg in range(rCount):
1658 eWalkCode += '''
1659 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1660 ''' % { "reg" : reg }
1661 if readDest:
1662 eWalkCode += '''
1663 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1664 ''' % { "reg" : reg }
1665 readDestCode = ''
1666 if readDest:
1667 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1668 eWalkCode += '''
1669 for (unsigned i = 0; i < eCount; i++) {
1670 unsigned j = i;
1671 Element srcElem1 = gtoh(srcReg1.elements[i]);
1672 Element destElem;
1673 %(readDest)s
1674 %(op)s
1675 destReg.elements[j] = htog(destElem);
1676 }
1677 ''' % { "op" : op, "readDest" : readDestCode }
1678 for reg in range(rCount):
1679 eWalkCode += '''
1680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1681 ''' % { "reg" : reg }
1682 iop = InstObjParams(name, Name,
1683 "RegRegOp",
1684 { "code": eWalkCode,
1685 "r_count": rCount,
1686 "predicate_test": predicateTest,
1687 "op_class": opClass }, [])
1688 header_output += NeonRegRegOpDeclare.subst(iop)
1689 exec_output += NeonEqualRegExecute.subst(iop)
1690 for type in types:
1691 substDict = { "targs" : type,
1692 "class_name" : Name }
1693 exec_output += NeonExecDeclare.subst(substDict)
1694
1695 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1696 global header_output, exec_output
1697 eWalkCode = simdEnabledCheckCode + '''
1698 RegVect srcReg1, destReg;
1699 '''
1700 for reg in range(rCount):
1701 eWalkCode += '''
1702 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1703 ''' % { "reg" : reg }
1704 if readDest:
1705 eWalkCode += '''
1706 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1707 ''' % { "reg" : reg }
1708 readDestCode = ''
1709 if readDest:
1710 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1711 eWalkCode += '''
1712 for (unsigned i = 0; i < eCount; i++) {
1713 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1714 Element destElem;
1715 %(readDest)s
1716 %(op)s
1717 destReg.elements[i] = htog(destElem);
1718 }
1719 ''' % { "op" : op, "readDest" : readDestCode }
1720 for reg in range(rCount):
1721 eWalkCode += '''
1722 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1723 ''' % { "reg" : reg }
1724 iop = InstObjParams(name, Name,
1725 "RegRegImmOp",
1726 { "code": eWalkCode,
1727 "r_count": rCount,
1728 "predicate_test": predicateTest,
1729 "op_class": opClass }, [])
1730 header_output += NeonRegRegImmOpDeclare.subst(iop)
1731 exec_output += NeonEqualRegExecute.subst(iop)
1732 for type in types:
1733 substDict = { "targs" : type,
1734 "class_name" : Name }
1735 exec_output += NeonExecDeclare.subst(substDict)
1736
1737 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1738 global header_output, exec_output
1739 eWalkCode = simdEnabledCheckCode + '''
1740 RegVect srcReg1, destReg;
1741 '''
1742 for reg in range(rCount):
1743 eWalkCode += '''
1744 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1745 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1746 ''' % { "reg" : reg }
1747 if readDest:
1748 eWalkCode += '''
1749 ''' % { "reg" : reg }
1750 readDestCode = ''
1751 if readDest:
1752 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1753 eWalkCode += op
1754 for reg in range(rCount):
1755 eWalkCode += '''
1756 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1757 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1758 ''' % { "reg" : reg }
1759 iop = InstObjParams(name, Name,
1760 "RegRegOp",
1761 { "code": eWalkCode,
1762 "r_count": rCount,
1763 "predicate_test": predicateTest,
1764 "op_class": opClass }, [])
1765 header_output += NeonRegRegOpDeclare.subst(iop)
1766 exec_output += NeonEqualRegExecute.subst(iop)
1767 for type in types:
1768 substDict = { "targs" : type,
1769 "class_name" : Name }
1770 exec_output += NeonExecDeclare.subst(substDict)
1771
1772 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1773 readDest=False, toInt=False):
1774 global header_output, exec_output
1775 eWalkCode = simdEnabledCheckCode + '''
1533 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1534 eWalkCode += '''
1535 for (unsigned i = 0; i < eCount; i++) {
1536 %(readOp)s
1537 %(declDest)s
1538 %(readDest)s
1539 %(op)s
1540 %(writeDest)s
1541 }
1542 ''' % { "readOp" : readOpCode,
1543 "declDest" : declDest,
1544 "readDest" : readDestCode,
1545 "op" : op,
1546 "writeDest" : writeDestCode }
1547 for reg in range(rCount):
1548 eWalkCode += '''
1549 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1550 ''' % { "reg" : reg }
1551 iop = InstObjParams(name, Name,
1552 "RegRegImmOp",
1553 { "code": eWalkCode,
1554 "r_count": rCount,
1555 "predicate_test": predicateTest,
1556 "op_class": opClass }, [])
1557 header_output += NeonRegRegImmOpDeclare.subst(iop)
1558 exec_output += NeonEqualRegExecute.subst(iop)
1559 for type in types:
1560 substDict = { "targs" : type,
1561 "class_name" : Name }
1562 exec_output += NeonExecDeclare.subst(substDict)
1563
1564 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1565 global header_output, exec_output
1566 eWalkCode = simdEnabledCheckCode + '''
1567 BigRegVect srcReg1;
1568 RegVect destReg;
1569 '''
1570 for reg in range(4):
1571 eWalkCode += '''
1572 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1573 ''' % { "reg" : reg }
1574 if readDest:
1575 for reg in range(2):
1576 eWalkCode += '''
1577 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1578 ''' % { "reg" : reg }
1579 readDestCode = ''
1580 if readDest:
1581 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1582 eWalkCode += '''
1583 for (unsigned i = 0; i < eCount; i++) {
1584 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1585 Element destElem;
1586 %(readDest)s
1587 %(op)s
1588 destReg.elements[i] = htog(destElem);
1589 }
1590 ''' % { "op" : op, "readDest" : readDestCode }
1591 for reg in range(2):
1592 eWalkCode += '''
1593 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1594 ''' % { "reg" : reg }
1595 iop = InstObjParams(name, Name,
1596 "RegRegImmOp",
1597 { "code": eWalkCode,
1598 "r_count": 2,
1599 "predicate_test": predicateTest,
1600 "op_class": opClass }, [])
1601 header_output += NeonRegRegImmOpDeclare.subst(iop)
1602 exec_output += NeonUnequalRegExecute.subst(iop)
1603 for type in types:
1604 substDict = { "targs" : type,
1605 "class_name" : Name }
1606 exec_output += NeonExecDeclare.subst(substDict)
1607
1608 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1609 global header_output, exec_output
1610 eWalkCode = simdEnabledCheckCode + '''
1611 RegVect srcReg1;
1612 BigRegVect destReg;
1613 '''
1614 for reg in range(2):
1615 eWalkCode += '''
1616 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1617 ''' % { "reg" : reg }
1618 if readDest:
1619 for reg in range(4):
1620 eWalkCode += '''
1621 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1622 ''' % { "reg" : reg }
1623 readDestCode = ''
1624 if readDest:
1625 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1626 eWalkCode += '''
1627 for (unsigned i = 0; i < eCount; i++) {
1628 Element srcElem1 = gtoh(srcReg1.elements[i]);
1629 BigElement destElem;
1630 %(readDest)s
1631 %(op)s
1632 destReg.elements[i] = htog(destElem);
1633 }
1634 ''' % { "op" : op, "readDest" : readDestCode }
1635 for reg in range(4):
1636 eWalkCode += '''
1637 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1638 ''' % { "reg" : reg }
1639 iop = InstObjParams(name, Name,
1640 "RegRegImmOp",
1641 { "code": eWalkCode,
1642 "r_count": 2,
1643 "predicate_test": predicateTest,
1644 "op_class": opClass }, [])
1645 header_output += NeonRegRegImmOpDeclare.subst(iop)
1646 exec_output += NeonUnequalRegExecute.subst(iop)
1647 for type in types:
1648 substDict = { "targs" : type,
1649 "class_name" : Name }
1650 exec_output += NeonExecDeclare.subst(substDict)
1651
1652 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1653 global header_output, exec_output
1654 eWalkCode = simdEnabledCheckCode + '''
1655 RegVect srcReg1, destReg;
1656 '''
1657 for reg in range(rCount):
1658 eWalkCode += '''
1659 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1660 ''' % { "reg" : reg }
1661 if readDest:
1662 eWalkCode += '''
1663 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1664 ''' % { "reg" : reg }
1665 readDestCode = ''
1666 if readDest:
1667 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1668 eWalkCode += '''
1669 for (unsigned i = 0; i < eCount; i++) {
1670 unsigned j = i;
1671 Element srcElem1 = gtoh(srcReg1.elements[i]);
1672 Element destElem;
1673 %(readDest)s
1674 %(op)s
1675 destReg.elements[j] = htog(destElem);
1676 }
1677 ''' % { "op" : op, "readDest" : readDestCode }
1678 for reg in range(rCount):
1679 eWalkCode += '''
1680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1681 ''' % { "reg" : reg }
1682 iop = InstObjParams(name, Name,
1683 "RegRegOp",
1684 { "code": eWalkCode,
1685 "r_count": rCount,
1686 "predicate_test": predicateTest,
1687 "op_class": opClass }, [])
1688 header_output += NeonRegRegOpDeclare.subst(iop)
1689 exec_output += NeonEqualRegExecute.subst(iop)
1690 for type in types:
1691 substDict = { "targs" : type,
1692 "class_name" : Name }
1693 exec_output += NeonExecDeclare.subst(substDict)
1694
1695 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1696 global header_output, exec_output
1697 eWalkCode = simdEnabledCheckCode + '''
1698 RegVect srcReg1, destReg;
1699 '''
1700 for reg in range(rCount):
1701 eWalkCode += '''
1702 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1703 ''' % { "reg" : reg }
1704 if readDest:
1705 eWalkCode += '''
1706 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1707 ''' % { "reg" : reg }
1708 readDestCode = ''
1709 if readDest:
1710 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1711 eWalkCode += '''
1712 for (unsigned i = 0; i < eCount; i++) {
1713 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1714 Element destElem;
1715 %(readDest)s
1716 %(op)s
1717 destReg.elements[i] = htog(destElem);
1718 }
1719 ''' % { "op" : op, "readDest" : readDestCode }
1720 for reg in range(rCount):
1721 eWalkCode += '''
1722 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1723 ''' % { "reg" : reg }
1724 iop = InstObjParams(name, Name,
1725 "RegRegImmOp",
1726 { "code": eWalkCode,
1727 "r_count": rCount,
1728 "predicate_test": predicateTest,
1729 "op_class": opClass }, [])
1730 header_output += NeonRegRegImmOpDeclare.subst(iop)
1731 exec_output += NeonEqualRegExecute.subst(iop)
1732 for type in types:
1733 substDict = { "targs" : type,
1734 "class_name" : Name }
1735 exec_output += NeonExecDeclare.subst(substDict)
1736
1737 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1738 global header_output, exec_output
1739 eWalkCode = simdEnabledCheckCode + '''
1740 RegVect srcReg1, destReg;
1741 '''
1742 for reg in range(rCount):
1743 eWalkCode += '''
1744 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1745 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1746 ''' % { "reg" : reg }
1747 if readDest:
1748 eWalkCode += '''
1749 ''' % { "reg" : reg }
1750 readDestCode = ''
1751 if readDest:
1752 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1753 eWalkCode += op
1754 for reg in range(rCount):
1755 eWalkCode += '''
1756 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1757 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1758 ''' % { "reg" : reg }
1759 iop = InstObjParams(name, Name,
1760 "RegRegOp",
1761 { "code": eWalkCode,
1762 "r_count": rCount,
1763 "predicate_test": predicateTest,
1764 "op_class": opClass }, [])
1765 header_output += NeonRegRegOpDeclare.subst(iop)
1766 exec_output += NeonEqualRegExecute.subst(iop)
1767 for type in types:
1768 substDict = { "targs" : type,
1769 "class_name" : Name }
1770 exec_output += NeonExecDeclare.subst(substDict)
1771
1772 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1773 readDest=False, toInt=False):
1774 global header_output, exec_output
1775 eWalkCode = simdEnabledCheckCode + '''
1776 typedef FloatReg FloatVect[rCount];
1776 typedef float FloatVect[rCount];
1777 FloatVect srcRegs1;
1778 '''
1779 if toInt:
1780 eWalkCode += 'RegVect destRegs;\n'
1781 else:
1782 eWalkCode += 'FloatVect destRegs;\n'
1783 for reg in range(rCount):
1784 eWalkCode += '''
1785 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1786 ''' % { "reg" : reg }
1787 if readDest:
1788 if toInt:
1789 eWalkCode += '''
1790 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1791 ''' % { "reg" : reg }
1792 else:
1793 eWalkCode += '''
1794 destRegs[%(reg)d] = FpDestP%(reg)d;
1795 ''' % { "reg" : reg }
1796 readDestCode = ''
1797 if readDest:
1798 readDestCode = 'destReg = destRegs[i];'
1777 FloatVect srcRegs1;
1778 '''
1779 if toInt:
1780 eWalkCode += 'RegVect destRegs;\n'
1781 else:
1782 eWalkCode += 'FloatVect destRegs;\n'
1783 for reg in range(rCount):
1784 eWalkCode += '''
1785 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1786 ''' % { "reg" : reg }
1787 if readDest:
1788 if toInt:
1789 eWalkCode += '''
1790 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1791 ''' % { "reg" : reg }
1792 else:
1793 eWalkCode += '''
1794 destRegs[%(reg)d] = FpDestP%(reg)d;
1795 ''' % { "reg" : reg }
1796 readDestCode = ''
1797 if readDest:
1798 readDestCode = 'destReg = destRegs[i];'
1799 destType = 'FloatReg'
1799 destType = 'float'
1800 writeDest = 'destRegs[r] = destReg;'
1801 if toInt:
1800 writeDest = 'destRegs[r] = destReg;'
1801 if toInt:
1802 destType = 'FloatRegBits'
1802 destType = 'uint32_t'
1803 writeDest = 'destRegs.regs[r] = destReg;'
1804 eWalkCode += '''
1805 for (unsigned r = 0; r < rCount; r++) {
1803 writeDest = 'destRegs.regs[r] = destReg;'
1804 eWalkCode += '''
1805 for (unsigned r = 0; r < rCount; r++) {
1806 FloatReg srcReg1 = srcRegs1[r];
1806 float srcReg1 = srcRegs1[r];
1807 %(destType)s destReg;
1808 %(readDest)s
1809 %(op)s
1810 %(writeDest)s
1811 }
1812 ''' % { "op" : op,
1813 "readDest" : readDestCode,
1814 "destType" : destType,
1815 "writeDest" : writeDest }
1816 for reg in range(rCount):
1817 if toInt:
1818 eWalkCode += '''
1819 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1820 ''' % { "reg" : reg }
1821 else:
1822 eWalkCode += '''
1823 FpDestP%(reg)d = destRegs[%(reg)d];
1824 ''' % { "reg" : reg }
1825 iop = InstObjParams(name, Name,
1826 "FpRegRegOp",
1827 { "code": eWalkCode,
1828 "r_count": rCount,
1829 "predicate_test": predicateTest,
1830 "op_class": opClass }, [])
1831 header_output += NeonRegRegOpDeclare.subst(iop)
1832 exec_output += NeonEqualRegExecute.subst(iop)
1833 for type in types:
1834 substDict = { "targs" : type,
1835 "class_name" : Name }
1836 exec_output += NeonExecDeclare.subst(substDict)
1837
1838 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1839 global header_output, exec_output
1840 eWalkCode = simdEnabledCheckCode + '''
1841 RegVect srcRegs;
1842 BigRegVect destReg;
1843 '''
1844 for reg in range(rCount):
1845 eWalkCode += '''
1846 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1847 ''' % { "reg" : reg }
1848 if readDest:
1849 eWalkCode += '''
1850 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1851 ''' % { "reg" : reg }
1852 readDestCode = ''
1853 if readDest:
1854 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1855 eWalkCode += '''
1856 for (unsigned i = 0; i < eCount / 2; i++) {
1857 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1858 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1859 BigElement destElem;
1860 %(readDest)s
1861 %(op)s
1862 destReg.elements[i] = htog(destElem);
1863 }
1864 ''' % { "op" : op, "readDest" : readDestCode }
1865 for reg in range(rCount):
1866 eWalkCode += '''
1867 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1868 ''' % { "reg" : reg }
1869 iop = InstObjParams(name, Name,
1870 "RegRegOp",
1871 { "code": eWalkCode,
1872 "r_count": rCount,
1873 "predicate_test": predicateTest,
1874 "op_class": opClass }, [])
1875 header_output += NeonRegRegOpDeclare.subst(iop)
1876 exec_output += NeonUnequalRegExecute.subst(iop)
1877 for type in types:
1878 substDict = { "targs" : type,
1879 "class_name" : Name }
1880 exec_output += NeonExecDeclare.subst(substDict)
1881
1882 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1883 global header_output, exec_output
1884 eWalkCode = simdEnabledCheckCode + '''
1885 BigRegVect srcReg1;
1886 RegVect destReg;
1887 '''
1888 for reg in range(4):
1889 eWalkCode += '''
1890 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1891 ''' % { "reg" : reg }
1892 if readDest:
1893 for reg in range(2):
1894 eWalkCode += '''
1895 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1896 ''' % { "reg" : reg }
1897 readDestCode = ''
1898 if readDest:
1899 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1900 eWalkCode += '''
1901 for (unsigned i = 0; i < eCount; i++) {
1902 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1903 Element destElem;
1904 %(readDest)s
1905 %(op)s
1906 destReg.elements[i] = htog(destElem);
1907 }
1908 ''' % { "op" : op, "readDest" : readDestCode }
1909 for reg in range(2):
1910 eWalkCode += '''
1911 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1912 ''' % { "reg" : reg }
1913 iop = InstObjParams(name, Name,
1914 "RegRegOp",
1915 { "code": eWalkCode,
1916 "r_count": 2,
1917 "predicate_test": predicateTest,
1918 "op_class": opClass }, [])
1919 header_output += NeonRegRegOpDeclare.subst(iop)
1920 exec_output += NeonUnequalRegExecute.subst(iop)
1921 for type in types:
1922 substDict = { "targs" : type,
1923 "class_name" : Name }
1924 exec_output += NeonExecDeclare.subst(substDict)
1925
1926 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1927 global header_output, exec_output
1928 eWalkCode = simdEnabledCheckCode + '''
1929 RegVect destReg;
1930 '''
1931 if readDest:
1932 for reg in range(rCount):
1933 eWalkCode += '''
1934 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1935 ''' % { "reg" : reg }
1936 readDestCode = ''
1937 if readDest:
1938 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1939 eWalkCode += '''
1940 for (unsigned i = 0; i < eCount; i++) {
1941 Element destElem;
1942 %(readDest)s
1943 %(op)s
1944 destReg.elements[i] = htog(destElem);
1945 }
1946 ''' % { "op" : op, "readDest" : readDestCode }
1947 for reg in range(rCount):
1948 eWalkCode += '''
1949 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1950 ''' % { "reg" : reg }
1951 iop = InstObjParams(name, Name,
1952 "RegImmOp",
1953 { "code": eWalkCode,
1954 "r_count": rCount,
1955 "predicate_test": predicateTest,
1956 "op_class": opClass }, [])
1957 header_output += NeonRegImmOpDeclare.subst(iop)
1958 exec_output += NeonEqualRegExecute.subst(iop)
1959 for type in types:
1960 substDict = { "targs" : type,
1961 "class_name" : Name }
1962 exec_output += NeonExecDeclare.subst(substDict)
1963
1964 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1965 global header_output, exec_output
1966 eWalkCode = simdEnabledCheckCode + '''
1967 RegVect srcReg1;
1968 BigRegVect destReg;
1969 '''
1970 for reg in range(2):
1971 eWalkCode += '''
1972 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1973 ''' % { "reg" : reg }
1974 if readDest:
1975 for reg in range(4):
1976 eWalkCode += '''
1977 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1978 ''' % { "reg" : reg }
1979 readDestCode = ''
1980 if readDest:
1981 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1982 eWalkCode += '''
1983 for (unsigned i = 0; i < eCount; i++) {
1984 Element srcElem1 = gtoh(srcReg1.elements[i]);
1985 BigElement destElem;
1986 %(readDest)s
1987 %(op)s
1988 destReg.elements[i] = htog(destElem);
1989 }
1990 ''' % { "op" : op, "readDest" : readDestCode }
1991 for reg in range(4):
1992 eWalkCode += '''
1993 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1994 ''' % { "reg" : reg }
1995 iop = InstObjParams(name, Name,
1996 "RegRegOp",
1997 { "code": eWalkCode,
1998 "r_count": 2,
1999 "predicate_test": predicateTest,
2000 "op_class": opClass }, [])
2001 header_output += NeonRegRegOpDeclare.subst(iop)
2002 exec_output += NeonUnequalRegExecute.subst(iop)
2003 for type in types:
2004 substDict = { "targs" : type,
2005 "class_name" : Name }
2006 exec_output += NeonExecDeclare.subst(substDict)
2007
2008 vhaddCode = '''
2009 Element carryBit =
2010 (((unsigned)srcElem1 & 0x1) +
2011 ((unsigned)srcElem2 & 0x1)) >> 1;
2012 // Use division instead of a shift to ensure the sign extension works
2013 // right. The compiler will figure out if it can be a shift. Mask the
2014 // inputs so they get truncated correctly.
2015 destElem = (((srcElem1 & ~(Element)1) / 2) +
2016 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2017 '''
2018 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2019 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2020
2021 vrhaddCode = '''
2022 Element carryBit =
2023 (((unsigned)srcElem1 & 0x1) +
2024 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2025 // Use division instead of a shift to ensure the sign extension works
2026 // right. The compiler will figure out if it can be a shift. Mask the
2027 // inputs so they get truncated correctly.
2028 destElem = (((srcElem1 & ~(Element)1) / 2) +
2029 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2030 '''
2031 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2032 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2033
2034 vhsubCode = '''
2035 Element barrowBit =
2036 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2037 // Use division instead of a shift to ensure the sign extension works
2038 // right. The compiler will figure out if it can be a shift. Mask the
2039 // inputs so they get truncated correctly.
2040 destElem = (((srcElem1 & ~(Element)1) / 2) -
2041 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2042 '''
2043 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2044 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2045
2046 vandCode = '''
2047 destElem = srcElem1 & srcElem2;
2048 '''
2049 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2050 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2051
2052 vbicCode = '''
2053 destElem = srcElem1 & ~srcElem2;
2054 '''
2055 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2056 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2057
2058 vorrCode = '''
2059 destElem = srcElem1 | srcElem2;
2060 '''
2061 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2062 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2063
2064 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2065 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2066
2067 vornCode = '''
2068 destElem = srcElem1 | ~srcElem2;
2069 '''
2070 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2071 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2072
2073 veorCode = '''
2074 destElem = srcElem1 ^ srcElem2;
2075 '''
2076 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2077 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2078
2079 vbifCode = '''
2080 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2081 '''
2082 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2083 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2084 vbitCode = '''
2085 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2086 '''
2087 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2088 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2089 vbslCode = '''
2090 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2091 '''
2092 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2093 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2094
2095 vmaxCode = '''
2096 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2097 '''
2098 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2099 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2100
2101 vminCode = '''
2102 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2103 '''
2104 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2105 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2106
2107 vaddCode = '''
2108 destElem = srcElem1 + srcElem2;
2109 '''
2110 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2111 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2112
2113 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2114 2, vaddCode, pairwise=True)
2115 vaddlwCode = '''
2116 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2117 '''
2118 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2119 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2120 vaddhnCode = '''
2121 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2122 (sizeof(Element) * 8);
2123 '''
2124 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2125 vraddhnCode = '''
2126 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2127 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2128 (sizeof(Element) * 8);
2129 '''
2130 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2131
2132 vsubCode = '''
2133 destElem = srcElem1 - srcElem2;
2134 '''
2135 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2136 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2137 vsublwCode = '''
2138 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2139 '''
2140 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2141 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2142
2143 vqaddUCode = '''
2144 destElem = srcElem1 + srcElem2;
2145 FPSCR fpscr = (FPSCR) FpscrQc;
2146 if (destElem < srcElem1 || destElem < srcElem2) {
2147 destElem = (Element)(-1);
2148 fpscr.qc = 1;
2149 }
2150 FpscrQc = fpscr;
2151 '''
2152 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2153 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2154 vsubhnCode = '''
2155 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2156 (sizeof(Element) * 8);
2157 '''
2158 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2159 vrsubhnCode = '''
2160 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2161 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2162 (sizeof(Element) * 8);
2163 '''
2164 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2165
2166 vqaddSCode = '''
2167 destElem = srcElem1 + srcElem2;
2168 FPSCR fpscr = (FPSCR) FpscrQc;
2169 bool negDest = (destElem < 0);
2170 bool negSrc1 = (srcElem1 < 0);
2171 bool negSrc2 = (srcElem2 < 0);
2172 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2173 if (negDest)
2174 /* If (>=0) plus (>=0) yields (<0), saturate to +. */
2175 destElem = std::numeric_limits<Element>::max();
2176 else
2177 /* If (<0) plus (<0) yields (>=0), saturate to -. */
2178 destElem = std::numeric_limits<Element>::min();
2179 fpscr.qc = 1;
2180 }
2181 FpscrQc = fpscr;
2182 '''
2183 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2184 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2185
2186 vqsubUCode = '''
2187 destElem = srcElem1 - srcElem2;
2188 FPSCR fpscr = (FPSCR) FpscrQc;
2189 if (destElem > srcElem1) {
2190 destElem = 0;
2191 fpscr.qc = 1;
2192 }
2193 FpscrQc = fpscr;
2194 '''
2195 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2196 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2197
2198 vqsubSCode = '''
2199 destElem = srcElem1 - srcElem2;
2200 FPSCR fpscr = (FPSCR) FpscrQc;
2201 bool negDest = (destElem < 0);
2202 bool negSrc1 = (srcElem1 < 0);
2203 bool posSrc2 = (srcElem2 >= 0);
2204 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2205 if (negDest)
2206 /* If (>=0) minus (<0) yields (<0), saturate to +. */
2207 destElem = std::numeric_limits<Element>::max();
2208 else
2209 /* If (<0) minus (>=0) yields (>=0), saturate to -. */
2210 destElem = std::numeric_limits<Element>::min();
2211 fpscr.qc = 1;
2212 }
2213 FpscrQc = fpscr;
2214 '''
2215 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2216 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2217
2218 vcgtCode = '''
2219 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2220 '''
2221 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2222 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2223
2224 vcgeCode = '''
2225 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2226 '''
2227 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2228 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2229
2230 vceqCode = '''
2231 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2232 '''
2233 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2234 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2235
2236 vshlCode = '''
2237 int16_t shiftAmt = (int8_t)srcElem2;
2238 if (shiftAmt < 0) {
2239 shiftAmt = -shiftAmt;
2240 if (shiftAmt >= sizeof(Element) * 8) {
2241 shiftAmt = sizeof(Element) * 8 - 1;
2242 destElem = 0;
2243 } else {
2244 destElem = (srcElem1 >> shiftAmt);
2245 }
2246 // Make sure the right shift sign extended when it should.
2247 if (ltz(srcElem1) && !ltz(destElem)) {
2248 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2249 1 - shiftAmt));
2250 }
2251 } else {
2252 if (shiftAmt >= sizeof(Element) * 8) {
2253 destElem = 0;
2254 } else {
2255 destElem = srcElem1 << shiftAmt;
2256 }
2257 }
2258 '''
2259 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2260 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2261
2262 vrshlCode = '''
2263 int16_t shiftAmt = (int8_t)srcElem2;
2264 if (shiftAmt < 0) {
2265 shiftAmt = -shiftAmt;
2266 Element rBit = 0;
2267 if (shiftAmt <= sizeof(Element) * 8)
2268 rBit = bits(srcElem1, shiftAmt - 1);
2269 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2270 rBit = 1;
2271 if (shiftAmt >= sizeof(Element) * 8) {
2272 shiftAmt = sizeof(Element) * 8 - 1;
2273 destElem = 0;
2274 } else {
2275 destElem = (srcElem1 >> shiftAmt);
2276 }
2277 // Make sure the right shift sign extended when it should.
2278 if (ltz(srcElem1) && !ltz(destElem)) {
2279 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2280 1 - shiftAmt));
2281 }
2282 destElem += rBit;
2283 } else if (shiftAmt > 0) {
2284 if (shiftAmt >= sizeof(Element) * 8) {
2285 destElem = 0;
2286 } else {
2287 destElem = srcElem1 << shiftAmt;
2288 }
2289 } else {
2290 destElem = srcElem1;
2291 }
2292 '''
2293 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2294 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2295
2296 vqshlUCode = '''
2297 int16_t shiftAmt = (int8_t)srcElem2;
2298 FPSCR fpscr = (FPSCR) FpscrQc;
2299 if (shiftAmt < 0) {
2300 shiftAmt = -shiftAmt;
2301 if (shiftAmt >= sizeof(Element) * 8) {
2302 shiftAmt = sizeof(Element) * 8 - 1;
2303 destElem = 0;
2304 } else {
2305 destElem = (srcElem1 >> shiftAmt);
2306 }
2307 } else if (shiftAmt > 0) {
2308 if (shiftAmt >= sizeof(Element) * 8) {
2309 if (srcElem1 != 0) {
2310 destElem = mask(sizeof(Element) * 8);
2311 fpscr.qc = 1;
2312 } else {
2313 destElem = 0;
2314 }
2315 } else {
2316 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2317 sizeof(Element) * 8 - shiftAmt)) {
2318 destElem = mask(sizeof(Element) * 8);
2319 fpscr.qc = 1;
2320 } else {
2321 destElem = srcElem1 << shiftAmt;
2322 }
2323 }
2324 } else {
2325 destElem = srcElem1;
2326 }
2327 FpscrQc = fpscr;
2328 '''
2329 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2330 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2331
2332 vqshlSCode = '''
2333 int16_t shiftAmt = (int8_t)srcElem2;
2334 FPSCR fpscr = (FPSCR) FpscrQc;
2335 if (shiftAmt < 0) {
2336 shiftAmt = -shiftAmt;
2337 if (shiftAmt >= sizeof(Element) * 8) {
2338 shiftAmt = sizeof(Element) * 8 - 1;
2339 destElem = 0;
2340 } else {
2341 destElem = (srcElem1 >> shiftAmt);
2342 }
2343 // Make sure the right shift sign extended when it should.
2344 if (srcElem1 < 0 && destElem >= 0) {
2345 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2346 1 - shiftAmt));
2347 }
2348 } else if (shiftAmt > 0) {
2349 bool sat = false;
2350 if (shiftAmt >= sizeof(Element) * 8) {
2351 if (srcElem1 != 0)
2352 sat = true;
2353 else
2354 destElem = 0;
2355 } else {
2356 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2357 sizeof(Element) * 8 - 1 - shiftAmt) !=
2358 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2359 sat = true;
2360 } else {
2361 destElem = srcElem1 << shiftAmt;
2362 }
2363 }
2364 if (sat) {
2365 fpscr.qc = 1;
2366 destElem = mask(sizeof(Element) * 8 - 1);
2367 if (srcElem1 < 0)
2368 destElem = ~destElem;
2369 }
2370 } else {
2371 destElem = srcElem1;
2372 }
2373 FpscrQc = fpscr;
2374 '''
2375 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2376 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2377
2378 vqrshlUCode = '''
2379 int16_t shiftAmt = (int8_t)srcElem2;
2380 FPSCR fpscr = (FPSCR) FpscrQc;
2381 if (shiftAmt < 0) {
2382 shiftAmt = -shiftAmt;
2383 Element rBit = 0;
2384 if (shiftAmt <= sizeof(Element) * 8)
2385 rBit = bits(srcElem1, shiftAmt - 1);
2386 if (shiftAmt >= sizeof(Element) * 8) {
2387 shiftAmt = sizeof(Element) * 8 - 1;
2388 destElem = 0;
2389 } else {
2390 destElem = (srcElem1 >> shiftAmt);
2391 }
2392 destElem += rBit;
2393 } else {
2394 if (shiftAmt >= sizeof(Element) * 8) {
2395 if (srcElem1 != 0) {
2396 destElem = mask(sizeof(Element) * 8);
2397 fpscr.qc = 1;
2398 } else {
2399 destElem = 0;
2400 }
2401 } else {
2402 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2403 sizeof(Element) * 8 - shiftAmt)) {
2404 destElem = mask(sizeof(Element) * 8);
2405 fpscr.qc = 1;
2406 } else {
2407 destElem = srcElem1 << shiftAmt;
2408 }
2409 }
2410 }
2411 FpscrQc = fpscr;
2412 '''
2413 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2414 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2415
2416 vqrshlSCode = '''
2417 int16_t shiftAmt = (int8_t)srcElem2;
2418 FPSCR fpscr = (FPSCR) FpscrQc;
2419 if (shiftAmt < 0) {
2420 shiftAmt = -shiftAmt;
2421 Element rBit = 0;
2422 if (shiftAmt <= sizeof(Element) * 8)
2423 rBit = bits(srcElem1, shiftAmt - 1);
2424 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2425 rBit = 1;
2426 if (shiftAmt >= sizeof(Element) * 8) {
2427 shiftAmt = sizeof(Element) * 8 - 1;
2428 destElem = 0;
2429 } else {
2430 destElem = (srcElem1 >> shiftAmt);
2431 }
2432 // Make sure the right shift sign extended when it should.
2433 if (srcElem1 < 0 && destElem >= 0) {
2434 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2435 1 - shiftAmt));
2436 }
2437 destElem += rBit;
2438 } else if (shiftAmt > 0) {
2439 bool sat = false;
2440 if (shiftAmt >= sizeof(Element) * 8) {
2441 if (srcElem1 != 0)
2442 sat = true;
2443 else
2444 destElem = 0;
2445 } else {
2446 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2447 sizeof(Element) * 8 - 1 - shiftAmt) !=
2448 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2449 sat = true;
2450 } else {
2451 destElem = srcElem1 << shiftAmt;
2452 }
2453 }
2454 if (sat) {
2455 fpscr.qc = 1;
2456 destElem = mask(sizeof(Element) * 8 - 1);
2457 if (srcElem1 < 0)
2458 destElem = ~destElem;
2459 }
2460 } else {
2461 destElem = srcElem1;
2462 }
2463 FpscrQc = fpscr;
2464 '''
2465 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2466 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2467
2468 vabaCode = '''
2469 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2470 (srcElem2 - srcElem1);
2471 '''
2472 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2473 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2474 vabalCode = '''
2475 destElem += (srcElem1 > srcElem2) ?
2476 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2477 ((BigElement)srcElem2 - (BigElement)srcElem1);
2478 '''
2479 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2480
2481 vabdCode = '''
2482 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2483 (srcElem2 - srcElem1);
2484 '''
2485 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2486 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2487 vabdlCode = '''
2488 destElem = (srcElem1 > srcElem2) ?
2489 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2490 ((BigElement)srcElem2 - (BigElement)srcElem1);
2491 '''
2492 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2493
2494 vtstCode = '''
2495 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2496 '''
2497 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2498 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2499
2500 vmulCode = '''
2501 destElem = srcElem1 * srcElem2;
2502 '''
2503 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2504 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2505 vmullCode = '''
2506 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2507 '''
2508 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2509
2510 vmlaCode = '''
2511 destElem = destElem + srcElem1 * srcElem2;
2512 '''
2513 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2514 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2515 vmlalCode = '''
2516 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2517 '''
2518 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2519
2520 vqdmlalCode = '''
2521 FPSCR fpscr = (FPSCR) FpscrQc;
2522 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2523 Element maxNeg = std::numeric_limits<Element>::min();
2524 Element halfNeg = maxNeg / 2;
2525 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2526 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2527 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2528 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2529 fpscr.qc = 1;
2530 }
2531 bool negPreDest = ltz(destElem);
2532 destElem += midElem;
2533 bool negDest = ltz(destElem);
2534 bool negMid = ltz(midElem);
2535 if (negPreDest == negMid && negMid != negDest) {
2536 destElem = mask(sizeof(BigElement) * 8 - 1);
2537 if (negPreDest)
2538 destElem = ~destElem;
2539 fpscr.qc = 1;
2540 }
2541 FpscrQc = fpscr;
2542 '''
2543 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2544
2545 vqdmlslCode = '''
2546 FPSCR fpscr = (FPSCR) FpscrQc;
2547 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2548 Element maxNeg = std::numeric_limits<Element>::min();
2549 Element halfNeg = maxNeg / 2;
2550 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2551 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2552 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2553 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2554 fpscr.qc = 1;
2555 }
2556 bool negPreDest = ltz(destElem);
2557 destElem -= midElem;
2558 bool negDest = ltz(destElem);
2559 bool posMid = ltz((BigElement)-midElem);
2560 if (negPreDest == posMid && posMid != negDest) {
2561 destElem = mask(sizeof(BigElement) * 8 - 1);
2562 if (negPreDest)
2563 destElem = ~destElem;
2564 fpscr.qc = 1;
2565 }
2566 FpscrQc = fpscr;
2567 '''
2568 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2569
2570 vqdmullCode = '''
2571 FPSCR fpscr = (FPSCR) FpscrQc;
2572 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2573 if (srcElem1 == srcElem2 &&
2574 srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2575 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2576 fpscr.qc = 1;
2577 }
2578 FpscrQc = fpscr;
2579 '''
2580 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2581
2582 vmlsCode = '''
2583 destElem = destElem - srcElem1 * srcElem2;
2584 '''
2585 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2586 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2587 vmlslCode = '''
2588 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2589 '''
2590 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2591
2592 vmulpCode = '''
2593 destElem = 0;
2594 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2595 if (bits(srcElem2, j))
2596 destElem ^= srcElem1 << j;
2597 }
2598 '''
2599 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2600 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2601 vmullpCode = '''
2602 destElem = 0;
2603 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2604 if (bits(srcElem2, j))
2605 destElem ^= (BigElement)srcElem1 << j;
2606 }
2607 '''
2608 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2609
2610 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2611
2612 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2613
2614 vqdmulhCode = '''
2615 FPSCR fpscr = (FPSCR) FpscrQc;
2616 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2617 (sizeof(Element) * 8);
2618 if (srcElem1 == srcElem2 &&
2619 srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2620 destElem = ~srcElem1;
2621 fpscr.qc = 1;
2622 }
2623 FpscrQc = fpscr;
2624 '''
2625 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2626 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2627
2628 vqrdmulhCode = '''
2629 FPSCR fpscr = (FPSCR) FpscrQc;
2630 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2631 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2632 (sizeof(Element) * 8);
2633 Element maxNeg = std::numeric_limits<Element>::min();
2634 Element halfNeg = maxNeg / 2;
2635 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2636 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2637 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2638 if (destElem < 0) {
2639 destElem = mask(sizeof(Element) * 8 - 1);
2640 } else {
2641 destElem = std::numeric_limits<Element>::min();
2642 }
2643 fpscr.qc = 1;
2644 }
2645 FpscrQc = fpscr;
2646 '''
2647 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2648 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2649 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2650 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2651
2652 vmaxfpCode = '''
2653 FPSCR fpscr = (FPSCR) FpscrExc;
2654 bool done;
2655 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2656 if (!done) {
2657 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2658 true, true, VfpRoundNearest);
2659 } else if (flushToZero(srcReg1, srcReg2)) {
2660 fpscr.idc = 1;
2661 }
2662 FpscrExc = fpscr;
2663 '''
2664 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2665 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2666
2667 vminfpCode = '''
2668 FPSCR fpscr = (FPSCR) FpscrExc;
2669 bool done;
2670 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2671 if (!done) {
2672 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2673 true, true, VfpRoundNearest);
2674 } else if (flushToZero(srcReg1, srcReg2)) {
2675 fpscr.idc = 1;
2676 }
2677 FpscrExc = fpscr;
2678 '''
2679 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2680 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2681
2682 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2683 2, vmaxfpCode, pairwise=True)
2684 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2685 4, vmaxfpCode, pairwise=True)
2686
2687 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2688 2, vminfpCode, pairwise=True)
2689 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2690 4, vminfpCode, pairwise=True)
2691
2692 vaddfpCode = '''
2693 FPSCR fpscr = (FPSCR) FpscrExc;
2694 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2695 true, true, VfpRoundNearest);
2696 FpscrExc = fpscr;
2697 '''
2698 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2699 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2700
2701 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2702 2, vaddfpCode, pairwise=True)
2703 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2704 4, vaddfpCode, pairwise=True)
2705
2706 vsubfpCode = '''
2707 FPSCR fpscr = (FPSCR) FpscrExc;
2708 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2709 true, true, VfpRoundNearest);
2710 FpscrExc = fpscr;
2711 '''
2712 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2713 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2714
2715 vmulfpCode = '''
2716 FPSCR fpscr = (FPSCR) FpscrExc;
2717 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2718 true, true, VfpRoundNearest);
2719 FpscrExc = fpscr;
2720 '''
2721 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2722 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2723
2724 vmlafpCode = '''
2725 FPSCR fpscr = (FPSCR) FpscrExc;
2726 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2727 true, true, VfpRoundNearest);
2728 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2729 true, true, VfpRoundNearest);
2730 FpscrExc = fpscr;
2731 '''
2732 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2733 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2734
2735 vfmafpCode = '''
2736 FPSCR fpscr = (FPSCR) FpscrExc;
2737 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2738 true, true, VfpRoundNearest);
2739 FpscrExc = fpscr;
2740 '''
2741 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2742 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2743
2744 vfmsfpCode = '''
2745 FPSCR fpscr = (FPSCR) FpscrExc;
2746 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2747 true, true, VfpRoundNearest);
2748 FpscrExc = fpscr;
2749 '''
2750 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2751 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2752
2753 vmlsfpCode = '''
2754 FPSCR fpscr = (FPSCR) FpscrExc;
2755 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2756 true, true, VfpRoundNearest);
2757 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2758 true, true, VfpRoundNearest);
2759 FpscrExc = fpscr;
2760 '''
2761 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2762 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2763
2764 vcgtfpCode = '''
2765 FPSCR fpscr = (FPSCR) FpscrExc;
2766 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2767 true, true, VfpRoundNearest);
2768 destReg = (res == 0) ? -1 : 0;
2769 if (res == 2.0)
2770 fpscr.ioc = 1;
2771 FpscrExc = fpscr;
2772 '''
2773 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2774 2, vcgtfpCode, toInt = True)
2775 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2776 4, vcgtfpCode, toInt = True)
2777
2778 vcgefpCode = '''
2779 FPSCR fpscr = (FPSCR) FpscrExc;
2780 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2781 true, true, VfpRoundNearest);
2782 destReg = (res == 0) ? -1 : 0;
2783 if (res == 2.0)
2784 fpscr.ioc = 1;
2785 FpscrExc = fpscr;
2786 '''
2787 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2788 2, vcgefpCode, toInt = True)
2789 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2790 4, vcgefpCode, toInt = True)
2791
2792 vacgtfpCode = '''
2793 FPSCR fpscr = (FPSCR) FpscrExc;
2794 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2795 true, true, VfpRoundNearest);
2796 destReg = (res == 0) ? -1 : 0;
2797 if (res == 2.0)
2798 fpscr.ioc = 1;
2799 FpscrExc = fpscr;
2800 '''
2801 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2802 2, vacgtfpCode, toInt = True)
2803 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2804 4, vacgtfpCode, toInt = True)
2805
2806 vacgefpCode = '''
2807 FPSCR fpscr = (FPSCR) FpscrExc;
2808 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2809 true, true, VfpRoundNearest);
2810 destReg = (res == 0) ? -1 : 0;
2811 if (res == 2.0)
2812 fpscr.ioc = 1;
2813 FpscrExc = fpscr;
2814 '''
2815 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2816 2, vacgefpCode, toInt = True)
2817 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2818 4, vacgefpCode, toInt = True)
2819
2820 vceqfpCode = '''
2821 FPSCR fpscr = (FPSCR) FpscrExc;
2822 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2823 true, true, VfpRoundNearest);
2824 destReg = (res == 0) ? -1 : 0;
2825 if (res == 2.0)
2826 fpscr.ioc = 1;
2827 FpscrExc = fpscr;
2828 '''
2829 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2830 2, vceqfpCode, toInt = True)
2831 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2832 4, vceqfpCode, toInt = True)
2833
2834 vrecpsCode = '''
2835 FPSCR fpscr = (FPSCR) FpscrExc;
2836 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2837 true, true, VfpRoundNearest);
2838 FpscrExc = fpscr;
2839 '''
2840 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2841 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2842
2843 vrsqrtsCode = '''
2844 FPSCR fpscr = (FPSCR) FpscrExc;
2845 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2846 true, true, VfpRoundNearest);
2847 FpscrExc = fpscr;
2848 '''
2849 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2850 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2851
2852 vabdfpCode = '''
2853 FPSCR fpscr = (FPSCR) FpscrExc;
2854 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2855 true, true, VfpRoundNearest);
2856 destReg = fabs(mid);
2857 FpscrExc = fpscr;
2858 '''
2859 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2860 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2861
2862 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2863 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2864 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2865 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2866 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2867
2868 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2869 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2870 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2871 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2872 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2873
2874 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2875 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2876 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2877 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2878 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2879
2880 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2881 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2882 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2883 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2884 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2885 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2886 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2887 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2888 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2889
2890 vshrCode = '''
2891 if (imm >= sizeof(srcElem1) * 8) {
2892 if (ltz(srcElem1))
2893 destElem = -1;
2894 else
2895 destElem = 0;
2896 } else {
2897 destElem = srcElem1 >> imm;
2898 }
2899 '''
2900 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2901 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2902
2903 vsraCode = '''
2904 Element mid;;
2905 if (imm >= sizeof(srcElem1) * 8) {
2906 mid = ltz(srcElem1) ? -1 : 0;
2907 } else {
2908 mid = srcElem1 >> imm;
2909 if (ltz(srcElem1) && !ltz(mid)) {
2910 mid |= -(mid & ((Element)1 <<
2911 (sizeof(Element) * 8 - 1 - imm)));
2912 }
2913 }
2914 destElem += mid;
2915 '''
2916 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2917 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2918
2919 vrshrCode = '''
2920 if (imm > sizeof(srcElem1) * 8) {
2921 destElem = 0;
2922 } else if (imm) {
2923 Element rBit = bits(srcElem1, imm - 1);
2924 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2925 } else {
2926 destElem = srcElem1;
2927 }
2928 '''
2929 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2930 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2931
2932 vrsraCode = '''
2933 if (imm > sizeof(srcElem1) * 8) {
2934 destElem += 0;
2935 } else if (imm) {
2936 Element rBit = bits(srcElem1, imm - 1);
2937 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2938 } else {
2939 destElem += srcElem1;
2940 }
2941 '''
2942 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2943 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2944
2945 vsriCode = '''
2946 if (imm >= sizeof(Element) * 8) {
2947 destElem = destElem;
2948 } else {
2949 destElem = (srcElem1 >> imm) |
2950 (destElem & ~mask(sizeof(Element) * 8 - imm));
2951 }
2952 '''
2953 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2954 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2955
2956 vshlCode = '''
2957 if (imm >= sizeof(Element) * 8) {
2958 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2959 } else {
2960 destElem = srcElem1 << imm;
2961 }
2962 '''
2963 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2964 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2965
2966 vsliCode = '''
2967 if (imm >= sizeof(Element) * 8) {
2968 destElem = destElem;
2969 } else {
2970 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2971 }
2972 '''
2973 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2974 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2975
2976 vqshlCode = '''
2977 FPSCR fpscr = (FPSCR) FpscrQc;
2978 if (imm >= sizeof(Element) * 8) {
2979 if (srcElem1 != 0) {
2980 destElem = std::numeric_limits<Element>::min();
2981 if (srcElem1 > 0)
2982 destElem = ~destElem;
2983 fpscr.qc = 1;
2984 } else {
2985 destElem = 0;
2986 }
2987 } else if (imm) {
2988 destElem = (srcElem1 << imm);
2989 uint64_t topBits = bits((uint64_t)srcElem1,
2990 sizeof(Element) * 8 - 1,
2991 sizeof(Element) * 8 - 1 - imm);
2992 if (topBits != 0 && topBits != mask(imm + 1)) {
2993 destElem = std::numeric_limits<Element>::min();
2994 if (srcElem1 > 0)
2995 destElem = ~destElem;
2996 fpscr.qc = 1;
2997 }
2998 } else {
2999 destElem = srcElem1;
3000 }
3001 FpscrQc = fpscr;
3002 '''
3003 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3004 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3005
3006 vqshluCode = '''
3007 FPSCR fpscr = (FPSCR) FpscrQc;
3008 if (imm >= sizeof(Element) * 8) {
3009 if (srcElem1 != 0) {
3010 destElem = mask(sizeof(Element) * 8);
3011 fpscr.qc = 1;
3012 } else {
3013 destElem = 0;
3014 }
3015 } else if (imm) {
3016 destElem = (srcElem1 << imm);
3017 uint64_t topBits = bits((uint64_t)srcElem1,
3018 sizeof(Element) * 8 - 1,
3019 sizeof(Element) * 8 - imm);
3020 if (topBits != 0) {
3021 destElem = mask(sizeof(Element) * 8);
3022 fpscr.qc = 1;
3023 }
3024 } else {
3025 destElem = srcElem1;
3026 }
3027 FpscrQc = fpscr;
3028 '''
3029 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3030 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3031
3032 vqshlusCode = '''
3033 FPSCR fpscr = (FPSCR) FpscrQc;
3034 if (imm >= sizeof(Element) * 8) {
3035 if (srcElem1 < 0) {
3036 destElem = 0;
3037 fpscr.qc = 1;
3038 } else if (srcElem1 > 0) {
3039 destElem = mask(sizeof(Element) * 8);
3040 fpscr.qc = 1;
3041 } else {
3042 destElem = 0;
3043 }
3044 } else if (imm) {
3045 destElem = (srcElem1 << imm);
3046 uint64_t topBits = bits((uint64_t)srcElem1,
3047 sizeof(Element) * 8 - 1,
3048 sizeof(Element) * 8 - imm);
3049 if (srcElem1 < 0) {
3050 destElem = 0;
3051 fpscr.qc = 1;
3052 } else if (topBits != 0) {
3053 destElem = mask(sizeof(Element) * 8);
3054 fpscr.qc = 1;
3055 }
3056 } else {
3057 if (srcElem1 < 0) {
3058 fpscr.qc = 1;
3059 destElem = 0;
3060 } else {
3061 destElem = srcElem1;
3062 }
3063 }
3064 FpscrQc = fpscr;
3065 '''
3066 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3067 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3068
3069 vshrnCode = '''
3070 if (imm >= sizeof(srcElem1) * 8) {
3071 destElem = 0;
3072 } else {
3073 destElem = srcElem1 >> imm;
3074 }
3075 '''
3076 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3077
3078 vrshrnCode = '''
3079 if (imm > sizeof(srcElem1) * 8) {
3080 destElem = 0;
3081 } else if (imm) {
3082 Element rBit = bits(srcElem1, imm - 1);
3083 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3084 } else {
3085 destElem = srcElem1;
3086 }
3087 '''
3088 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3089
3090 vqshrnCode = '''
3091 FPSCR fpscr = (FPSCR) FpscrQc;
3092 if (imm > sizeof(srcElem1) * 8) {
3093 if (srcElem1 != 0 && srcElem1 != -1)
3094 fpscr.qc = 1;
3095 destElem = 0;
3096 } else if (imm) {
3097 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3098 mid |= -(mid & ((BigElement)1 <<
3099 (sizeof(BigElement) * 8 - 1 - imm)));
3100 if (mid != (Element)mid) {
3101 destElem = mask(sizeof(Element) * 8 - 1);
3102 if (srcElem1 < 0)
3103 destElem = ~destElem;
3104 fpscr.qc = 1;
3105 } else {
3106 destElem = mid;
3107 }
3108 } else {
3109 destElem = srcElem1;
3110 }
3111 FpscrQc = fpscr;
3112 '''
3113 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3114
3115 vqshrunCode = '''
3116 FPSCR fpscr = (FPSCR) FpscrQc;
3117 if (imm > sizeof(srcElem1) * 8) {
3118 if (srcElem1 != 0)
3119 fpscr.qc = 1;
3120 destElem = 0;
3121 } else if (imm) {
3122 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3123 if (mid != (Element)mid) {
3124 destElem = mask(sizeof(Element) * 8);
3125 fpscr.qc = 1;
3126 } else {
3127 destElem = mid;
3128 }
3129 } else {
3130 destElem = srcElem1;
3131 }
3132 FpscrQc = fpscr;
3133 '''
3134 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3135 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3136
3137 vqshrunsCode = '''
3138 FPSCR fpscr = (FPSCR) FpscrQc;
3139 if (imm > sizeof(srcElem1) * 8) {
3140 if (srcElem1 != 0)
3141 fpscr.qc = 1;
3142 destElem = 0;
3143 } else if (imm) {
3144 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3145 if (bits(mid, sizeof(BigElement) * 8 - 1,
3146 sizeof(Element) * 8) != 0) {
3147 if (srcElem1 < 0) {
3148 destElem = 0;
3149 } else {
3150 destElem = mask(sizeof(Element) * 8);
3151 }
3152 fpscr.qc = 1;
3153 } else {
3154 destElem = mid;
3155 }
3156 } else {
3157 destElem = srcElem1;
3158 }
3159 FpscrQc = fpscr;
3160 '''
3161 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3162 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3163
3164 vqrshrnCode = '''
3165 FPSCR fpscr = (FPSCR) FpscrQc;
3166 if (imm > sizeof(srcElem1) * 8) {
3167 if (srcElem1 != 0 && srcElem1 != -1)
3168 fpscr.qc = 1;
3169 destElem = 0;
3170 } else if (imm) {
3171 BigElement mid = (srcElem1 >> (imm - 1));
3172 uint64_t rBit = mid & 0x1;
3173 mid >>= 1;
3174 mid |= -(mid & ((BigElement)1 <<
3175 (sizeof(BigElement) * 8 - 1 - imm)));
3176 mid += rBit;
3177 if (mid != (Element)mid) {
3178 destElem = mask(sizeof(Element) * 8 - 1);
3179 if (srcElem1 < 0)
3180 destElem = ~destElem;
3181 fpscr.qc = 1;
3182 } else {
3183 destElem = mid;
3184 }
3185 } else {
3186 if (srcElem1 != (Element)srcElem1) {
3187 destElem = mask(sizeof(Element) * 8 - 1);
3188 if (srcElem1 < 0)
3189 destElem = ~destElem;
3190 fpscr.qc = 1;
3191 } else {
3192 destElem = srcElem1;
3193 }
3194 }
3195 FpscrQc = fpscr;
3196 '''
3197 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3198 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3199
3200 vqrshrunCode = '''
3201 FPSCR fpscr = (FPSCR) FpscrQc;
3202 if (imm > sizeof(srcElem1) * 8) {
3203 if (srcElem1 != 0)
3204 fpscr.qc = 1;
3205 destElem = 0;
3206 } else if (imm) {
3207 BigElement mid = (srcElem1 >> (imm - 1));
3208 uint64_t rBit = mid & 0x1;
3209 mid >>= 1;
3210 mid += rBit;
3211 if (mid != (Element)mid) {
3212 destElem = mask(sizeof(Element) * 8);
3213 fpscr.qc = 1;
3214 } else {
3215 destElem = mid;
3216 }
3217 } else {
3218 if (srcElem1 != (Element)srcElem1) {
3219 destElem = mask(sizeof(Element) * 8 - 1);
3220 fpscr.qc = 1;
3221 } else {
3222 destElem = srcElem1;
3223 }
3224 }
3225 FpscrQc = fpscr;
3226 '''
3227 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3228 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3229
3230 vqrshrunsCode = '''
3231 FPSCR fpscr = (FPSCR) FpscrQc;
3232 if (imm > sizeof(srcElem1) * 8) {
3233 if (srcElem1 != 0)
3234 fpscr.qc = 1;
3235 destElem = 0;
3236 } else if (imm) {
3237 BigElement mid = (srcElem1 >> (imm - 1));
3238 uint64_t rBit = mid & 0x1;
3239 mid >>= 1;
3240 mid |= -(mid & ((BigElement)1 <<
3241 (sizeof(BigElement) * 8 - 1 - imm)));
3242 mid += rBit;
3243 if (bits(mid, sizeof(BigElement) * 8 - 1,
3244 sizeof(Element) * 8) != 0) {
3245 if (srcElem1 < 0) {
3246 destElem = 0;
3247 } else {
3248 destElem = mask(sizeof(Element) * 8);
3249 }
3250 fpscr.qc = 1;
3251 } else {
3252 destElem = mid;
3253 }
3254 } else {
3255 if (srcElem1 < 0) {
3256 fpscr.qc = 1;
3257 destElem = 0;
3258 } else {
3259 destElem = srcElem1;
3260 }
3261 }
3262 FpscrQc = fpscr;
3263 '''
3264 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3265 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3266
3267 vshllCode = '''
3268 if (imm >= sizeof(destElem) * 8) {
3269 destElem = 0;
3270 } else {
3271 destElem = (BigElement)srcElem1 << imm;
3272 }
3273 '''
3274 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3275
3276 vmovlCode = '''
3277 destElem = srcElem1;
3278 '''
3279 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3280
3281 vcvt2ufxCode = '''
3282 FPSCR fpscr = (FPSCR) FpscrExc;
3283 if (flushToZero(srcElem1))
3284 fpscr.idc = 1;
3285 VfpSavedState state = prepFpState(VfpRoundNearest);
3286 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3287 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3288 __asm__ __volatile__("" :: "m" (destReg));
3289 finishVfp(fpscr, state, true);
3290 FpscrExc = fpscr;
3291 '''
3292 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3293 2, vcvt2ufxCode, toInt = True)
3294 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3295 4, vcvt2ufxCode, toInt = True)
3296
3297 vcvt2sfxCode = '''
3298 FPSCR fpscr = (FPSCR) FpscrExc;
3299 if (flushToZero(srcElem1))
3300 fpscr.idc = 1;
3301 VfpSavedState state = prepFpState(VfpRoundNearest);
3302 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3303 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3304 __asm__ __volatile__("" :: "m" (destReg));
3305 finishVfp(fpscr, state, true);
3306 FpscrExc = fpscr;
3307 '''
3308 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3309 2, vcvt2sfxCode, toInt = True)
3310 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3311 4, vcvt2sfxCode, toInt = True)
3312
3313 vcvtu2fpCode = '''
3314 FPSCR fpscr = (FPSCR) FpscrExc;
3315 VfpSavedState state = prepFpState(VfpRoundNearest);
3316 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3317 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3318 __asm__ __volatile__("" :: "m" (destElem));
3319 finishVfp(fpscr, state, true);
3320 FpscrExc = fpscr;
3321 '''
3322 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3323 2, vcvtu2fpCode, fromInt = True)
3324 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3325 4, vcvtu2fpCode, fromInt = True)
3326
3327 vcvts2fpCode = '''
3328 FPSCR fpscr = (FPSCR) FpscrExc;
3329 VfpSavedState state = prepFpState(VfpRoundNearest);
3330 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3331 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3332 __asm__ __volatile__("" :: "m" (destElem));
3333 finishVfp(fpscr, state, true);
3334 FpscrExc = fpscr;
3335 '''
3336 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3337 2, vcvts2fpCode, fromInt = True)
3338 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3339 4, vcvts2fpCode, fromInt = True)
3340
3341 vcvts2hCode = '''
3342 destElem = 0;
3343 FPSCR fpscr = (FPSCR) FpscrExc;
3344 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3345 if (flushToZero(srcFp1))
3346 fpscr.idc = 1;
3347 VfpSavedState state = prepFpState(VfpRoundNearest);
3348 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3349 : "m" (srcFp1), "m" (destElem));
3350 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3351 fpscr.ahp, srcFp1);
3352 __asm__ __volatile__("" :: "m" (destElem));
3353 finishVfp(fpscr, state, true);
3354 FpscrExc = fpscr;
3355 '''
3356 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3357
3358 vcvth2sCode = '''
3359 destElem = 0;
3360 FPSCR fpscr = (FPSCR) FpscrExc;
3361 VfpSavedState state = prepFpState(VfpRoundNearest);
3362 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3363 : "m" (srcElem1), "m" (destElem));
3364 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3365 __asm__ __volatile__("" :: "m" (destElem));
3366 finishVfp(fpscr, state, true);
3367 FpscrExc = fpscr;
3368 '''
3369 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3370
3371 vrsqrteCode = '''
3372 destElem = unsignedRSqrtEstimate(srcElem1);
3373 '''
3374 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3375 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3376
3377 vrsqrtefpCode = '''
3378 FPSCR fpscr = (FPSCR) FpscrExc;
3379 if (flushToZero(srcReg1))
3380 fpscr.idc = 1;
3381 destReg = fprSqrtEstimate(fpscr, srcReg1);
3382 FpscrExc = fpscr;
3383 '''
3384 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3385 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3386
3387 vrecpeCode = '''
3388 destElem = unsignedRecipEstimate(srcElem1);
3389 '''
3390 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3391 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3392
3393 vrecpefpCode = '''
3394 FPSCR fpscr = (FPSCR) FpscrExc;
3395 if (flushToZero(srcReg1))
3396 fpscr.idc = 1;
3397 destReg = fpRecipEstimate(fpscr, srcReg1);
3398 FpscrExc = fpscr;
3399 '''
3400 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3401 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3402
3403 vrev16Code = '''
3404 destElem = srcElem1;
3405 unsigned groupSize = ((1 << 1) / sizeof(Element));
3406 unsigned reverseMask = (groupSize - 1);
3407 j = i ^ reverseMask;
3408 '''
3409 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3410 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3411 vrev32Code = '''
3412 destElem = srcElem1;
3413 unsigned groupSize = ((1 << 2) / sizeof(Element));
3414 unsigned reverseMask = (groupSize - 1);
3415 j = i ^ reverseMask;
3416 '''
3417 twoRegMiscInst("vrev32", "NVrev32D",
3418 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3419 twoRegMiscInst("vrev32", "NVrev32Q",
3420 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3421 vrev64Code = '''
3422 destElem = srcElem1;
3423 unsigned groupSize = ((1 << 3) / sizeof(Element));
3424 unsigned reverseMask = (groupSize - 1);
3425 j = i ^ reverseMask;
3426 '''
3427 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3428 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3429
3430 split('exec')
3431 exec_output += vcompares + vcomparesL
3432
3433 vpaddlCode = '''
3434 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3435 '''
3436 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3437 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3438
3439 vpadalCode = '''
3440 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3441 '''
3442 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3443 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3444
3445 vclsCode = '''
3446 unsigned count = 0;
3447 if (srcElem1 < 0) {
3448 srcElem1 <<= 1;
3449 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3450 count++;
3451 srcElem1 <<= 1;
3452 }
3453 } else {
3454 srcElem1 <<= 1;
3455 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3456 count++;
3457 srcElem1 <<= 1;
3458 }
3459 }
3460 destElem = count;
3461 '''
3462 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3463 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3464
3465 vclzCode = '''
3466 unsigned count = 0;
3467 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3468 count++;
3469 srcElem1 <<= 1;
3470 }
3471 destElem = count;
3472 '''
3473 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3474 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3475
3476 vcntCode = '''
3477 unsigned count = 0;
3478 while (srcElem1 && count < sizeof(Element) * 8) {
3479 count += srcElem1 & 0x1;
3480 srcElem1 >>= 1;
3481 }
3482 destElem = count;
3483 '''
3484
3485 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3486 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3487
3488 vmvnCode = '''
3489 destElem = ~srcElem1;
3490 '''
3491 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3492 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3493
3494 vqabsCode = '''
3495 FPSCR fpscr = (FPSCR) FpscrQc;
3496 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3497 fpscr.qc = 1;
3498 destElem = ~srcElem1;
3499 } else if (srcElem1 < 0) {
3500 destElem = -srcElem1;
3501 } else {
3502 destElem = srcElem1;
3503 }
3504 FpscrQc = fpscr;
3505 '''
3506 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3507 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3508
3509 vqnegCode = '''
3510 FPSCR fpscr = (FPSCR) FpscrQc;
3511 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3512 fpscr.qc = 1;
3513 destElem = ~srcElem1;
3514 } else {
3515 destElem = -srcElem1;
3516 }
3517 FpscrQc = fpscr;
3518 '''
3519 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3520 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3521
3522 vabsCode = '''
3523 if (srcElem1 < 0) {
3524 destElem = -srcElem1;
3525 } else {
3526 destElem = srcElem1;
3527 }
3528 '''
3529
3530 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3531 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3532 vabsfpCode = '''
3533 union
3534 {
3535 uint32_t i;
3536 float f;
3537 } cStruct;
3538 cStruct.f = srcReg1;
3539 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3540 destReg = cStruct.f;
3541 '''
3542 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3543 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3544
3545 vnegCode = '''
3546 destElem = -srcElem1;
3547 '''
3548 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3549 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3550 vnegfpCode = '''
3551 destReg = -srcReg1;
3552 '''
3553 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3554 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3555
3556 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3557 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3558 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3559 vcgtfpCode = '''
3560 FPSCR fpscr = (FPSCR) FpscrExc;
1807 %(destType)s destReg;
1808 %(readDest)s
1809 %(op)s
1810 %(writeDest)s
1811 }
1812 ''' % { "op" : op,
1813 "readDest" : readDestCode,
1814 "destType" : destType,
1815 "writeDest" : writeDest }
1816 for reg in range(rCount):
1817 if toInt:
1818 eWalkCode += '''
1819 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1820 ''' % { "reg" : reg }
1821 else:
1822 eWalkCode += '''
1823 FpDestP%(reg)d = destRegs[%(reg)d];
1824 ''' % { "reg" : reg }
1825 iop = InstObjParams(name, Name,
1826 "FpRegRegOp",
1827 { "code": eWalkCode,
1828 "r_count": rCount,
1829 "predicate_test": predicateTest,
1830 "op_class": opClass }, [])
1831 header_output += NeonRegRegOpDeclare.subst(iop)
1832 exec_output += NeonEqualRegExecute.subst(iop)
1833 for type in types:
1834 substDict = { "targs" : type,
1835 "class_name" : Name }
1836 exec_output += NeonExecDeclare.subst(substDict)
1837
1838 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1839 global header_output, exec_output
1840 eWalkCode = simdEnabledCheckCode + '''
1841 RegVect srcRegs;
1842 BigRegVect destReg;
1843 '''
1844 for reg in range(rCount):
1845 eWalkCode += '''
1846 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1847 ''' % { "reg" : reg }
1848 if readDest:
1849 eWalkCode += '''
1850 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1851 ''' % { "reg" : reg }
1852 readDestCode = ''
1853 if readDest:
1854 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1855 eWalkCode += '''
1856 for (unsigned i = 0; i < eCount / 2; i++) {
1857 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1858 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1859 BigElement destElem;
1860 %(readDest)s
1861 %(op)s
1862 destReg.elements[i] = htog(destElem);
1863 }
1864 ''' % { "op" : op, "readDest" : readDestCode }
1865 for reg in range(rCount):
1866 eWalkCode += '''
1867 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1868 ''' % { "reg" : reg }
1869 iop = InstObjParams(name, Name,
1870 "RegRegOp",
1871 { "code": eWalkCode,
1872 "r_count": rCount,
1873 "predicate_test": predicateTest,
1874 "op_class": opClass }, [])
1875 header_output += NeonRegRegOpDeclare.subst(iop)
1876 exec_output += NeonUnequalRegExecute.subst(iop)
1877 for type in types:
1878 substDict = { "targs" : type,
1879 "class_name" : Name }
1880 exec_output += NeonExecDeclare.subst(substDict)
1881
1882 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1883 global header_output, exec_output
1884 eWalkCode = simdEnabledCheckCode + '''
1885 BigRegVect srcReg1;
1886 RegVect destReg;
1887 '''
1888 for reg in range(4):
1889 eWalkCode += '''
1890 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1891 ''' % { "reg" : reg }
1892 if readDest:
1893 for reg in range(2):
1894 eWalkCode += '''
1895 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1896 ''' % { "reg" : reg }
1897 readDestCode = ''
1898 if readDest:
1899 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1900 eWalkCode += '''
1901 for (unsigned i = 0; i < eCount; i++) {
1902 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1903 Element destElem;
1904 %(readDest)s
1905 %(op)s
1906 destReg.elements[i] = htog(destElem);
1907 }
1908 ''' % { "op" : op, "readDest" : readDestCode }
1909 for reg in range(2):
1910 eWalkCode += '''
1911 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1912 ''' % { "reg" : reg }
1913 iop = InstObjParams(name, Name,
1914 "RegRegOp",
1915 { "code": eWalkCode,
1916 "r_count": 2,
1917 "predicate_test": predicateTest,
1918 "op_class": opClass }, [])
1919 header_output += NeonRegRegOpDeclare.subst(iop)
1920 exec_output += NeonUnequalRegExecute.subst(iop)
1921 for type in types:
1922 substDict = { "targs" : type,
1923 "class_name" : Name }
1924 exec_output += NeonExecDeclare.subst(substDict)
1925
1926 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1927 global header_output, exec_output
1928 eWalkCode = simdEnabledCheckCode + '''
1929 RegVect destReg;
1930 '''
1931 if readDest:
1932 for reg in range(rCount):
1933 eWalkCode += '''
1934 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1935 ''' % { "reg" : reg }
1936 readDestCode = ''
1937 if readDest:
1938 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1939 eWalkCode += '''
1940 for (unsigned i = 0; i < eCount; i++) {
1941 Element destElem;
1942 %(readDest)s
1943 %(op)s
1944 destReg.elements[i] = htog(destElem);
1945 }
1946 ''' % { "op" : op, "readDest" : readDestCode }
1947 for reg in range(rCount):
1948 eWalkCode += '''
1949 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1950 ''' % { "reg" : reg }
1951 iop = InstObjParams(name, Name,
1952 "RegImmOp",
1953 { "code": eWalkCode,
1954 "r_count": rCount,
1955 "predicate_test": predicateTest,
1956 "op_class": opClass }, [])
1957 header_output += NeonRegImmOpDeclare.subst(iop)
1958 exec_output += NeonEqualRegExecute.subst(iop)
1959 for type in types:
1960 substDict = { "targs" : type,
1961 "class_name" : Name }
1962 exec_output += NeonExecDeclare.subst(substDict)
1963
1964 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1965 global header_output, exec_output
1966 eWalkCode = simdEnabledCheckCode + '''
1967 RegVect srcReg1;
1968 BigRegVect destReg;
1969 '''
1970 for reg in range(2):
1971 eWalkCode += '''
1972 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1973 ''' % { "reg" : reg }
1974 if readDest:
1975 for reg in range(4):
1976 eWalkCode += '''
1977 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1978 ''' % { "reg" : reg }
1979 readDestCode = ''
1980 if readDest:
1981 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1982 eWalkCode += '''
1983 for (unsigned i = 0; i < eCount; i++) {
1984 Element srcElem1 = gtoh(srcReg1.elements[i]);
1985 BigElement destElem;
1986 %(readDest)s
1987 %(op)s
1988 destReg.elements[i] = htog(destElem);
1989 }
1990 ''' % { "op" : op, "readDest" : readDestCode }
1991 for reg in range(4):
1992 eWalkCode += '''
1993 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1994 ''' % { "reg" : reg }
1995 iop = InstObjParams(name, Name,
1996 "RegRegOp",
1997 { "code": eWalkCode,
1998 "r_count": 2,
1999 "predicate_test": predicateTest,
2000 "op_class": opClass }, [])
2001 header_output += NeonRegRegOpDeclare.subst(iop)
2002 exec_output += NeonUnequalRegExecute.subst(iop)
2003 for type in types:
2004 substDict = { "targs" : type,
2005 "class_name" : Name }
2006 exec_output += NeonExecDeclare.subst(substDict)
2007
2008 vhaddCode = '''
2009 Element carryBit =
2010 (((unsigned)srcElem1 & 0x1) +
2011 ((unsigned)srcElem2 & 0x1)) >> 1;
2012 // Use division instead of a shift to ensure the sign extension works
2013 // right. The compiler will figure out if it can be a shift. Mask the
2014 // inputs so they get truncated correctly.
2015 destElem = (((srcElem1 & ~(Element)1) / 2) +
2016 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2017 '''
2018 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2019 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2020
2021 vrhaddCode = '''
2022 Element carryBit =
2023 (((unsigned)srcElem1 & 0x1) +
2024 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2025 // Use division instead of a shift to ensure the sign extension works
2026 // right. The compiler will figure out if it can be a shift. Mask the
2027 // inputs so they get truncated correctly.
2028 destElem = (((srcElem1 & ~(Element)1) / 2) +
2029 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2030 '''
2031 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2032 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2033
2034 vhsubCode = '''
2035 Element barrowBit =
2036 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2037 // Use division instead of a shift to ensure the sign extension works
2038 // right. The compiler will figure out if it can be a shift. Mask the
2039 // inputs so they get truncated correctly.
2040 destElem = (((srcElem1 & ~(Element)1) / 2) -
2041 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2042 '''
2043 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2044 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2045
2046 vandCode = '''
2047 destElem = srcElem1 & srcElem2;
2048 '''
2049 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2050 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2051
2052 vbicCode = '''
2053 destElem = srcElem1 & ~srcElem2;
2054 '''
2055 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2056 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2057
2058 vorrCode = '''
2059 destElem = srcElem1 | srcElem2;
2060 '''
2061 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2062 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2063
2064 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2065 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2066
2067 vornCode = '''
2068 destElem = srcElem1 | ~srcElem2;
2069 '''
2070 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2071 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2072
2073 veorCode = '''
2074 destElem = srcElem1 ^ srcElem2;
2075 '''
2076 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2077 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2078
2079 vbifCode = '''
2080 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2081 '''
2082 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2083 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2084 vbitCode = '''
2085 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2086 '''
2087 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2088 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2089 vbslCode = '''
2090 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2091 '''
2092 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2093 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2094
2095 vmaxCode = '''
2096 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2097 '''
2098 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2099 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2100
2101 vminCode = '''
2102 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2103 '''
2104 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2105 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2106
2107 vaddCode = '''
2108 destElem = srcElem1 + srcElem2;
2109 '''
2110 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2111 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2112
2113 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2114 2, vaddCode, pairwise=True)
2115 vaddlwCode = '''
2116 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2117 '''
2118 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2119 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2120 vaddhnCode = '''
2121 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2122 (sizeof(Element) * 8);
2123 '''
2124 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2125 vraddhnCode = '''
2126 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2127 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2128 (sizeof(Element) * 8);
2129 '''
2130 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2131
2132 vsubCode = '''
2133 destElem = srcElem1 - srcElem2;
2134 '''
2135 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2136 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2137 vsublwCode = '''
2138 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2139 '''
2140 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2141 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2142
2143 vqaddUCode = '''
2144 destElem = srcElem1 + srcElem2;
2145 FPSCR fpscr = (FPSCR) FpscrQc;
2146 if (destElem < srcElem1 || destElem < srcElem2) {
2147 destElem = (Element)(-1);
2148 fpscr.qc = 1;
2149 }
2150 FpscrQc = fpscr;
2151 '''
2152 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2153 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2154 vsubhnCode = '''
2155 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2156 (sizeof(Element) * 8);
2157 '''
2158 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2159 vrsubhnCode = '''
2160 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2161 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2162 (sizeof(Element) * 8);
2163 '''
2164 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2165
2166 vqaddSCode = '''
2167 destElem = srcElem1 + srcElem2;
2168 FPSCR fpscr = (FPSCR) FpscrQc;
2169 bool negDest = (destElem < 0);
2170 bool negSrc1 = (srcElem1 < 0);
2171 bool negSrc2 = (srcElem2 < 0);
2172 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2173 if (negDest)
2174 /* If (>=0) plus (>=0) yields (<0), saturate to +. */
2175 destElem = std::numeric_limits<Element>::max();
2176 else
2177 /* If (<0) plus (<0) yields (>=0), saturate to -. */
2178 destElem = std::numeric_limits<Element>::min();
2179 fpscr.qc = 1;
2180 }
2181 FpscrQc = fpscr;
2182 '''
2183 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2184 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2185
2186 vqsubUCode = '''
2187 destElem = srcElem1 - srcElem2;
2188 FPSCR fpscr = (FPSCR) FpscrQc;
2189 if (destElem > srcElem1) {
2190 destElem = 0;
2191 fpscr.qc = 1;
2192 }
2193 FpscrQc = fpscr;
2194 '''
2195 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2196 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2197
2198 vqsubSCode = '''
2199 destElem = srcElem1 - srcElem2;
2200 FPSCR fpscr = (FPSCR) FpscrQc;
2201 bool negDest = (destElem < 0);
2202 bool negSrc1 = (srcElem1 < 0);
2203 bool posSrc2 = (srcElem2 >= 0);
2204 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2205 if (negDest)
2206 /* If (>=0) minus (<0) yields (<0), saturate to +. */
2207 destElem = std::numeric_limits<Element>::max();
2208 else
2209 /* If (<0) minus (>=0) yields (>=0), saturate to -. */
2210 destElem = std::numeric_limits<Element>::min();
2211 fpscr.qc = 1;
2212 }
2213 FpscrQc = fpscr;
2214 '''
2215 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2216 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2217
2218 vcgtCode = '''
2219 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2220 '''
2221 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2222 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2223
2224 vcgeCode = '''
2225 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2226 '''
2227 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2228 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2229
2230 vceqCode = '''
2231 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2232 '''
2233 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2234 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2235
2236 vshlCode = '''
2237 int16_t shiftAmt = (int8_t)srcElem2;
2238 if (shiftAmt < 0) {
2239 shiftAmt = -shiftAmt;
2240 if (shiftAmt >= sizeof(Element) * 8) {
2241 shiftAmt = sizeof(Element) * 8 - 1;
2242 destElem = 0;
2243 } else {
2244 destElem = (srcElem1 >> shiftAmt);
2245 }
2246 // Make sure the right shift sign extended when it should.
2247 if (ltz(srcElem1) && !ltz(destElem)) {
2248 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2249 1 - shiftAmt));
2250 }
2251 } else {
2252 if (shiftAmt >= sizeof(Element) * 8) {
2253 destElem = 0;
2254 } else {
2255 destElem = srcElem1 << shiftAmt;
2256 }
2257 }
2258 '''
2259 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2260 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2261
2262 vrshlCode = '''
2263 int16_t shiftAmt = (int8_t)srcElem2;
2264 if (shiftAmt < 0) {
2265 shiftAmt = -shiftAmt;
2266 Element rBit = 0;
2267 if (shiftAmt <= sizeof(Element) * 8)
2268 rBit = bits(srcElem1, shiftAmt - 1);
2269 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2270 rBit = 1;
2271 if (shiftAmt >= sizeof(Element) * 8) {
2272 shiftAmt = sizeof(Element) * 8 - 1;
2273 destElem = 0;
2274 } else {
2275 destElem = (srcElem1 >> shiftAmt);
2276 }
2277 // Make sure the right shift sign extended when it should.
2278 if (ltz(srcElem1) && !ltz(destElem)) {
2279 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2280 1 - shiftAmt));
2281 }
2282 destElem += rBit;
2283 } else if (shiftAmt > 0) {
2284 if (shiftAmt >= sizeof(Element) * 8) {
2285 destElem = 0;
2286 } else {
2287 destElem = srcElem1 << shiftAmt;
2288 }
2289 } else {
2290 destElem = srcElem1;
2291 }
2292 '''
2293 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2294 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2295
2296 vqshlUCode = '''
2297 int16_t shiftAmt = (int8_t)srcElem2;
2298 FPSCR fpscr = (FPSCR) FpscrQc;
2299 if (shiftAmt < 0) {
2300 shiftAmt = -shiftAmt;
2301 if (shiftAmt >= sizeof(Element) * 8) {
2302 shiftAmt = sizeof(Element) * 8 - 1;
2303 destElem = 0;
2304 } else {
2305 destElem = (srcElem1 >> shiftAmt);
2306 }
2307 } else if (shiftAmt > 0) {
2308 if (shiftAmt >= sizeof(Element) * 8) {
2309 if (srcElem1 != 0) {
2310 destElem = mask(sizeof(Element) * 8);
2311 fpscr.qc = 1;
2312 } else {
2313 destElem = 0;
2314 }
2315 } else {
2316 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2317 sizeof(Element) * 8 - shiftAmt)) {
2318 destElem = mask(sizeof(Element) * 8);
2319 fpscr.qc = 1;
2320 } else {
2321 destElem = srcElem1 << shiftAmt;
2322 }
2323 }
2324 } else {
2325 destElem = srcElem1;
2326 }
2327 FpscrQc = fpscr;
2328 '''
2329 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2330 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2331
2332 vqshlSCode = '''
2333 int16_t shiftAmt = (int8_t)srcElem2;
2334 FPSCR fpscr = (FPSCR) FpscrQc;
2335 if (shiftAmt < 0) {
2336 shiftAmt = -shiftAmt;
2337 if (shiftAmt >= sizeof(Element) * 8) {
2338 shiftAmt = sizeof(Element) * 8 - 1;
2339 destElem = 0;
2340 } else {
2341 destElem = (srcElem1 >> shiftAmt);
2342 }
2343 // Make sure the right shift sign extended when it should.
2344 if (srcElem1 < 0 && destElem >= 0) {
2345 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2346 1 - shiftAmt));
2347 }
2348 } else if (shiftAmt > 0) {
2349 bool sat = false;
2350 if (shiftAmt >= sizeof(Element) * 8) {
2351 if (srcElem1 != 0)
2352 sat = true;
2353 else
2354 destElem = 0;
2355 } else {
2356 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2357 sizeof(Element) * 8 - 1 - shiftAmt) !=
2358 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2359 sat = true;
2360 } else {
2361 destElem = srcElem1 << shiftAmt;
2362 }
2363 }
2364 if (sat) {
2365 fpscr.qc = 1;
2366 destElem = mask(sizeof(Element) * 8 - 1);
2367 if (srcElem1 < 0)
2368 destElem = ~destElem;
2369 }
2370 } else {
2371 destElem = srcElem1;
2372 }
2373 FpscrQc = fpscr;
2374 '''
2375 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2376 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2377
2378 vqrshlUCode = '''
2379 int16_t shiftAmt = (int8_t)srcElem2;
2380 FPSCR fpscr = (FPSCR) FpscrQc;
2381 if (shiftAmt < 0) {
2382 shiftAmt = -shiftAmt;
2383 Element rBit = 0;
2384 if (shiftAmt <= sizeof(Element) * 8)
2385 rBit = bits(srcElem1, shiftAmt - 1);
2386 if (shiftAmt >= sizeof(Element) * 8) {
2387 shiftAmt = sizeof(Element) * 8 - 1;
2388 destElem = 0;
2389 } else {
2390 destElem = (srcElem1 >> shiftAmt);
2391 }
2392 destElem += rBit;
2393 } else {
2394 if (shiftAmt >= sizeof(Element) * 8) {
2395 if (srcElem1 != 0) {
2396 destElem = mask(sizeof(Element) * 8);
2397 fpscr.qc = 1;
2398 } else {
2399 destElem = 0;
2400 }
2401 } else {
2402 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2403 sizeof(Element) * 8 - shiftAmt)) {
2404 destElem = mask(sizeof(Element) * 8);
2405 fpscr.qc = 1;
2406 } else {
2407 destElem = srcElem1 << shiftAmt;
2408 }
2409 }
2410 }
2411 FpscrQc = fpscr;
2412 '''
2413 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2414 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2415
2416 vqrshlSCode = '''
2417 int16_t shiftAmt = (int8_t)srcElem2;
2418 FPSCR fpscr = (FPSCR) FpscrQc;
2419 if (shiftAmt < 0) {
2420 shiftAmt = -shiftAmt;
2421 Element rBit = 0;
2422 if (shiftAmt <= sizeof(Element) * 8)
2423 rBit = bits(srcElem1, shiftAmt - 1);
2424 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2425 rBit = 1;
2426 if (shiftAmt >= sizeof(Element) * 8) {
2427 shiftAmt = sizeof(Element) * 8 - 1;
2428 destElem = 0;
2429 } else {
2430 destElem = (srcElem1 >> shiftAmt);
2431 }
2432 // Make sure the right shift sign extended when it should.
2433 if (srcElem1 < 0 && destElem >= 0) {
2434 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2435 1 - shiftAmt));
2436 }
2437 destElem += rBit;
2438 } else if (shiftAmt > 0) {
2439 bool sat = false;
2440 if (shiftAmt >= sizeof(Element) * 8) {
2441 if (srcElem1 != 0)
2442 sat = true;
2443 else
2444 destElem = 0;
2445 } else {
2446 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2447 sizeof(Element) * 8 - 1 - shiftAmt) !=
2448 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2449 sat = true;
2450 } else {
2451 destElem = srcElem1 << shiftAmt;
2452 }
2453 }
2454 if (sat) {
2455 fpscr.qc = 1;
2456 destElem = mask(sizeof(Element) * 8 - 1);
2457 if (srcElem1 < 0)
2458 destElem = ~destElem;
2459 }
2460 } else {
2461 destElem = srcElem1;
2462 }
2463 FpscrQc = fpscr;
2464 '''
2465 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2466 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2467
2468 vabaCode = '''
2469 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2470 (srcElem2 - srcElem1);
2471 '''
2472 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2473 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2474 vabalCode = '''
2475 destElem += (srcElem1 > srcElem2) ?
2476 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2477 ((BigElement)srcElem2 - (BigElement)srcElem1);
2478 '''
2479 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2480
2481 vabdCode = '''
2482 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2483 (srcElem2 - srcElem1);
2484 '''
2485 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2486 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2487 vabdlCode = '''
2488 destElem = (srcElem1 > srcElem2) ?
2489 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2490 ((BigElement)srcElem2 - (BigElement)srcElem1);
2491 '''
2492 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2493
2494 vtstCode = '''
2495 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2496 '''
2497 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2498 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2499
2500 vmulCode = '''
2501 destElem = srcElem1 * srcElem2;
2502 '''
2503 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2504 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2505 vmullCode = '''
2506 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2507 '''
2508 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2509
2510 vmlaCode = '''
2511 destElem = destElem + srcElem1 * srcElem2;
2512 '''
2513 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2514 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2515 vmlalCode = '''
2516 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2517 '''
2518 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2519
2520 vqdmlalCode = '''
2521 FPSCR fpscr = (FPSCR) FpscrQc;
2522 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2523 Element maxNeg = std::numeric_limits<Element>::min();
2524 Element halfNeg = maxNeg / 2;
2525 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2526 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2527 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2528 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2529 fpscr.qc = 1;
2530 }
2531 bool negPreDest = ltz(destElem);
2532 destElem += midElem;
2533 bool negDest = ltz(destElem);
2534 bool negMid = ltz(midElem);
2535 if (negPreDest == negMid && negMid != negDest) {
2536 destElem = mask(sizeof(BigElement) * 8 - 1);
2537 if (negPreDest)
2538 destElem = ~destElem;
2539 fpscr.qc = 1;
2540 }
2541 FpscrQc = fpscr;
2542 '''
2543 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2544
2545 vqdmlslCode = '''
2546 FPSCR fpscr = (FPSCR) FpscrQc;
2547 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2548 Element maxNeg = std::numeric_limits<Element>::min();
2549 Element halfNeg = maxNeg / 2;
2550 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2551 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2552 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2553 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2554 fpscr.qc = 1;
2555 }
2556 bool negPreDest = ltz(destElem);
2557 destElem -= midElem;
2558 bool negDest = ltz(destElem);
2559 bool posMid = ltz((BigElement)-midElem);
2560 if (negPreDest == posMid && posMid != negDest) {
2561 destElem = mask(sizeof(BigElement) * 8 - 1);
2562 if (negPreDest)
2563 destElem = ~destElem;
2564 fpscr.qc = 1;
2565 }
2566 FpscrQc = fpscr;
2567 '''
2568 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2569
2570 vqdmullCode = '''
2571 FPSCR fpscr = (FPSCR) FpscrQc;
2572 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2573 if (srcElem1 == srcElem2 &&
2574 srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2575 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2576 fpscr.qc = 1;
2577 }
2578 FpscrQc = fpscr;
2579 '''
2580 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2581
2582 vmlsCode = '''
2583 destElem = destElem - srcElem1 * srcElem2;
2584 '''
2585 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2586 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2587 vmlslCode = '''
2588 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2589 '''
2590 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2591
2592 vmulpCode = '''
2593 destElem = 0;
2594 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2595 if (bits(srcElem2, j))
2596 destElem ^= srcElem1 << j;
2597 }
2598 '''
2599 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2600 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2601 vmullpCode = '''
2602 destElem = 0;
2603 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2604 if (bits(srcElem2, j))
2605 destElem ^= (BigElement)srcElem1 << j;
2606 }
2607 '''
2608 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2609
2610 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2611
2612 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2613
2614 vqdmulhCode = '''
2615 FPSCR fpscr = (FPSCR) FpscrQc;
2616 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2617 (sizeof(Element) * 8);
2618 if (srcElem1 == srcElem2 &&
2619 srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2620 destElem = ~srcElem1;
2621 fpscr.qc = 1;
2622 }
2623 FpscrQc = fpscr;
2624 '''
2625 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2626 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2627
2628 vqrdmulhCode = '''
2629 FPSCR fpscr = (FPSCR) FpscrQc;
2630 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2631 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2632 (sizeof(Element) * 8);
2633 Element maxNeg = std::numeric_limits<Element>::min();
2634 Element halfNeg = maxNeg / 2;
2635 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2636 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2637 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2638 if (destElem < 0) {
2639 destElem = mask(sizeof(Element) * 8 - 1);
2640 } else {
2641 destElem = std::numeric_limits<Element>::min();
2642 }
2643 fpscr.qc = 1;
2644 }
2645 FpscrQc = fpscr;
2646 '''
2647 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2648 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2649 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2650 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2651
2652 vmaxfpCode = '''
2653 FPSCR fpscr = (FPSCR) FpscrExc;
2654 bool done;
2655 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2656 if (!done) {
2657 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2658 true, true, VfpRoundNearest);
2659 } else if (flushToZero(srcReg1, srcReg2)) {
2660 fpscr.idc = 1;
2661 }
2662 FpscrExc = fpscr;
2663 '''
2664 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2665 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2666
2667 vminfpCode = '''
2668 FPSCR fpscr = (FPSCR) FpscrExc;
2669 bool done;
2670 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2671 if (!done) {
2672 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2673 true, true, VfpRoundNearest);
2674 } else if (flushToZero(srcReg1, srcReg2)) {
2675 fpscr.idc = 1;
2676 }
2677 FpscrExc = fpscr;
2678 '''
2679 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2680 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2681
2682 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2683 2, vmaxfpCode, pairwise=True)
2684 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2685 4, vmaxfpCode, pairwise=True)
2686
2687 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2688 2, vminfpCode, pairwise=True)
2689 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2690 4, vminfpCode, pairwise=True)
2691
2692 vaddfpCode = '''
2693 FPSCR fpscr = (FPSCR) FpscrExc;
2694 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2695 true, true, VfpRoundNearest);
2696 FpscrExc = fpscr;
2697 '''
2698 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2699 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2700
2701 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2702 2, vaddfpCode, pairwise=True)
2703 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2704 4, vaddfpCode, pairwise=True)
2705
2706 vsubfpCode = '''
2707 FPSCR fpscr = (FPSCR) FpscrExc;
2708 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2709 true, true, VfpRoundNearest);
2710 FpscrExc = fpscr;
2711 '''
2712 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2713 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2714
2715 vmulfpCode = '''
2716 FPSCR fpscr = (FPSCR) FpscrExc;
2717 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2718 true, true, VfpRoundNearest);
2719 FpscrExc = fpscr;
2720 '''
2721 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2722 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2723
2724 vmlafpCode = '''
2725 FPSCR fpscr = (FPSCR) FpscrExc;
2726 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2727 true, true, VfpRoundNearest);
2728 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2729 true, true, VfpRoundNearest);
2730 FpscrExc = fpscr;
2731 '''
2732 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2733 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2734
2735 vfmafpCode = '''
2736 FPSCR fpscr = (FPSCR) FpscrExc;
2737 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2738 true, true, VfpRoundNearest);
2739 FpscrExc = fpscr;
2740 '''
2741 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2742 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2743
2744 vfmsfpCode = '''
2745 FPSCR fpscr = (FPSCR) FpscrExc;
2746 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2747 true, true, VfpRoundNearest);
2748 FpscrExc = fpscr;
2749 '''
2750 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2751 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2752
2753 vmlsfpCode = '''
2754 FPSCR fpscr = (FPSCR) FpscrExc;
2755 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2756 true, true, VfpRoundNearest);
2757 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2758 true, true, VfpRoundNearest);
2759 FpscrExc = fpscr;
2760 '''
2761 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2762 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2763
2764 vcgtfpCode = '''
2765 FPSCR fpscr = (FPSCR) FpscrExc;
2766 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2767 true, true, VfpRoundNearest);
2768 destReg = (res == 0) ? -1 : 0;
2769 if (res == 2.0)
2770 fpscr.ioc = 1;
2771 FpscrExc = fpscr;
2772 '''
2773 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2774 2, vcgtfpCode, toInt = True)
2775 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2776 4, vcgtfpCode, toInt = True)
2777
2778 vcgefpCode = '''
2779 FPSCR fpscr = (FPSCR) FpscrExc;
2780 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2781 true, true, VfpRoundNearest);
2782 destReg = (res == 0) ? -1 : 0;
2783 if (res == 2.0)
2784 fpscr.ioc = 1;
2785 FpscrExc = fpscr;
2786 '''
2787 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2788 2, vcgefpCode, toInt = True)
2789 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2790 4, vcgefpCode, toInt = True)
2791
2792 vacgtfpCode = '''
2793 FPSCR fpscr = (FPSCR) FpscrExc;
2794 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2795 true, true, VfpRoundNearest);
2796 destReg = (res == 0) ? -1 : 0;
2797 if (res == 2.0)
2798 fpscr.ioc = 1;
2799 FpscrExc = fpscr;
2800 '''
2801 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2802 2, vacgtfpCode, toInt = True)
2803 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2804 4, vacgtfpCode, toInt = True)
2805
2806 vacgefpCode = '''
2807 FPSCR fpscr = (FPSCR) FpscrExc;
2808 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2809 true, true, VfpRoundNearest);
2810 destReg = (res == 0) ? -1 : 0;
2811 if (res == 2.0)
2812 fpscr.ioc = 1;
2813 FpscrExc = fpscr;
2814 '''
2815 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2816 2, vacgefpCode, toInt = True)
2817 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2818 4, vacgefpCode, toInt = True)
2819
2820 vceqfpCode = '''
2821 FPSCR fpscr = (FPSCR) FpscrExc;
2822 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2823 true, true, VfpRoundNearest);
2824 destReg = (res == 0) ? -1 : 0;
2825 if (res == 2.0)
2826 fpscr.ioc = 1;
2827 FpscrExc = fpscr;
2828 '''
2829 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2830 2, vceqfpCode, toInt = True)
2831 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2832 4, vceqfpCode, toInt = True)
2833
2834 vrecpsCode = '''
2835 FPSCR fpscr = (FPSCR) FpscrExc;
2836 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2837 true, true, VfpRoundNearest);
2838 FpscrExc = fpscr;
2839 '''
2840 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2841 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2842
2843 vrsqrtsCode = '''
2844 FPSCR fpscr = (FPSCR) FpscrExc;
2845 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2846 true, true, VfpRoundNearest);
2847 FpscrExc = fpscr;
2848 '''
2849 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2850 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2851
2852 vabdfpCode = '''
2853 FPSCR fpscr = (FPSCR) FpscrExc;
2854 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2855 true, true, VfpRoundNearest);
2856 destReg = fabs(mid);
2857 FpscrExc = fpscr;
2858 '''
2859 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2860 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2861
2862 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2863 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2864 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2865 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2866 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2867
2868 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2869 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2870 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2871 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2872 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2873
2874 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2875 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2876 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2877 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2878 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2879
2880 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2881 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2882 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2883 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2884 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2885 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2886 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2887 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2888 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2889
2890 vshrCode = '''
2891 if (imm >= sizeof(srcElem1) * 8) {
2892 if (ltz(srcElem1))
2893 destElem = -1;
2894 else
2895 destElem = 0;
2896 } else {
2897 destElem = srcElem1 >> imm;
2898 }
2899 '''
2900 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2901 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2902
2903 vsraCode = '''
2904 Element mid;;
2905 if (imm >= sizeof(srcElem1) * 8) {
2906 mid = ltz(srcElem1) ? -1 : 0;
2907 } else {
2908 mid = srcElem1 >> imm;
2909 if (ltz(srcElem1) && !ltz(mid)) {
2910 mid |= -(mid & ((Element)1 <<
2911 (sizeof(Element) * 8 - 1 - imm)));
2912 }
2913 }
2914 destElem += mid;
2915 '''
2916 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2917 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2918
2919 vrshrCode = '''
2920 if (imm > sizeof(srcElem1) * 8) {
2921 destElem = 0;
2922 } else if (imm) {
2923 Element rBit = bits(srcElem1, imm - 1);
2924 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2925 } else {
2926 destElem = srcElem1;
2927 }
2928 '''
2929 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2930 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2931
2932 vrsraCode = '''
2933 if (imm > sizeof(srcElem1) * 8) {
2934 destElem += 0;
2935 } else if (imm) {
2936 Element rBit = bits(srcElem1, imm - 1);
2937 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2938 } else {
2939 destElem += srcElem1;
2940 }
2941 '''
2942 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2943 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2944
2945 vsriCode = '''
2946 if (imm >= sizeof(Element) * 8) {
2947 destElem = destElem;
2948 } else {
2949 destElem = (srcElem1 >> imm) |
2950 (destElem & ~mask(sizeof(Element) * 8 - imm));
2951 }
2952 '''
2953 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2954 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2955
2956 vshlCode = '''
2957 if (imm >= sizeof(Element) * 8) {
2958 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2959 } else {
2960 destElem = srcElem1 << imm;
2961 }
2962 '''
2963 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2964 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2965
2966 vsliCode = '''
2967 if (imm >= sizeof(Element) * 8) {
2968 destElem = destElem;
2969 } else {
2970 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2971 }
2972 '''
2973 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2974 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2975
2976 vqshlCode = '''
2977 FPSCR fpscr = (FPSCR) FpscrQc;
2978 if (imm >= sizeof(Element) * 8) {
2979 if (srcElem1 != 0) {
2980 destElem = std::numeric_limits<Element>::min();
2981 if (srcElem1 > 0)
2982 destElem = ~destElem;
2983 fpscr.qc = 1;
2984 } else {
2985 destElem = 0;
2986 }
2987 } else if (imm) {
2988 destElem = (srcElem1 << imm);
2989 uint64_t topBits = bits((uint64_t)srcElem1,
2990 sizeof(Element) * 8 - 1,
2991 sizeof(Element) * 8 - 1 - imm);
2992 if (topBits != 0 && topBits != mask(imm + 1)) {
2993 destElem = std::numeric_limits<Element>::min();
2994 if (srcElem1 > 0)
2995 destElem = ~destElem;
2996 fpscr.qc = 1;
2997 }
2998 } else {
2999 destElem = srcElem1;
3000 }
3001 FpscrQc = fpscr;
3002 '''
3003 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3004 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3005
3006 vqshluCode = '''
3007 FPSCR fpscr = (FPSCR) FpscrQc;
3008 if (imm >= sizeof(Element) * 8) {
3009 if (srcElem1 != 0) {
3010 destElem = mask(sizeof(Element) * 8);
3011 fpscr.qc = 1;
3012 } else {
3013 destElem = 0;
3014 }
3015 } else if (imm) {
3016 destElem = (srcElem1 << imm);
3017 uint64_t topBits = bits((uint64_t)srcElem1,
3018 sizeof(Element) * 8 - 1,
3019 sizeof(Element) * 8 - imm);
3020 if (topBits != 0) {
3021 destElem = mask(sizeof(Element) * 8);
3022 fpscr.qc = 1;
3023 }
3024 } else {
3025 destElem = srcElem1;
3026 }
3027 FpscrQc = fpscr;
3028 '''
3029 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3030 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3031
3032 vqshlusCode = '''
3033 FPSCR fpscr = (FPSCR) FpscrQc;
3034 if (imm >= sizeof(Element) * 8) {
3035 if (srcElem1 < 0) {
3036 destElem = 0;
3037 fpscr.qc = 1;
3038 } else if (srcElem1 > 0) {
3039 destElem = mask(sizeof(Element) * 8);
3040 fpscr.qc = 1;
3041 } else {
3042 destElem = 0;
3043 }
3044 } else if (imm) {
3045 destElem = (srcElem1 << imm);
3046 uint64_t topBits = bits((uint64_t)srcElem1,
3047 sizeof(Element) * 8 - 1,
3048 sizeof(Element) * 8 - imm);
3049 if (srcElem1 < 0) {
3050 destElem = 0;
3051 fpscr.qc = 1;
3052 } else if (topBits != 0) {
3053 destElem = mask(sizeof(Element) * 8);
3054 fpscr.qc = 1;
3055 }
3056 } else {
3057 if (srcElem1 < 0) {
3058 fpscr.qc = 1;
3059 destElem = 0;
3060 } else {
3061 destElem = srcElem1;
3062 }
3063 }
3064 FpscrQc = fpscr;
3065 '''
3066 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3067 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3068
3069 vshrnCode = '''
3070 if (imm >= sizeof(srcElem1) * 8) {
3071 destElem = 0;
3072 } else {
3073 destElem = srcElem1 >> imm;
3074 }
3075 '''
3076 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3077
3078 vrshrnCode = '''
3079 if (imm > sizeof(srcElem1) * 8) {
3080 destElem = 0;
3081 } else if (imm) {
3082 Element rBit = bits(srcElem1, imm - 1);
3083 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3084 } else {
3085 destElem = srcElem1;
3086 }
3087 '''
3088 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3089
3090 vqshrnCode = '''
3091 FPSCR fpscr = (FPSCR) FpscrQc;
3092 if (imm > sizeof(srcElem1) * 8) {
3093 if (srcElem1 != 0 && srcElem1 != -1)
3094 fpscr.qc = 1;
3095 destElem = 0;
3096 } else if (imm) {
3097 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3098 mid |= -(mid & ((BigElement)1 <<
3099 (sizeof(BigElement) * 8 - 1 - imm)));
3100 if (mid != (Element)mid) {
3101 destElem = mask(sizeof(Element) * 8 - 1);
3102 if (srcElem1 < 0)
3103 destElem = ~destElem;
3104 fpscr.qc = 1;
3105 } else {
3106 destElem = mid;
3107 }
3108 } else {
3109 destElem = srcElem1;
3110 }
3111 FpscrQc = fpscr;
3112 '''
3113 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3114
3115 vqshrunCode = '''
3116 FPSCR fpscr = (FPSCR) FpscrQc;
3117 if (imm > sizeof(srcElem1) * 8) {
3118 if (srcElem1 != 0)
3119 fpscr.qc = 1;
3120 destElem = 0;
3121 } else if (imm) {
3122 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3123 if (mid != (Element)mid) {
3124 destElem = mask(sizeof(Element) * 8);
3125 fpscr.qc = 1;
3126 } else {
3127 destElem = mid;
3128 }
3129 } else {
3130 destElem = srcElem1;
3131 }
3132 FpscrQc = fpscr;
3133 '''
3134 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3135 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3136
3137 vqshrunsCode = '''
3138 FPSCR fpscr = (FPSCR) FpscrQc;
3139 if (imm > sizeof(srcElem1) * 8) {
3140 if (srcElem1 != 0)
3141 fpscr.qc = 1;
3142 destElem = 0;
3143 } else if (imm) {
3144 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3145 if (bits(mid, sizeof(BigElement) * 8 - 1,
3146 sizeof(Element) * 8) != 0) {
3147 if (srcElem1 < 0) {
3148 destElem = 0;
3149 } else {
3150 destElem = mask(sizeof(Element) * 8);
3151 }
3152 fpscr.qc = 1;
3153 } else {
3154 destElem = mid;
3155 }
3156 } else {
3157 destElem = srcElem1;
3158 }
3159 FpscrQc = fpscr;
3160 '''
3161 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3162 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3163
3164 vqrshrnCode = '''
3165 FPSCR fpscr = (FPSCR) FpscrQc;
3166 if (imm > sizeof(srcElem1) * 8) {
3167 if (srcElem1 != 0 && srcElem1 != -1)
3168 fpscr.qc = 1;
3169 destElem = 0;
3170 } else if (imm) {
3171 BigElement mid = (srcElem1 >> (imm - 1));
3172 uint64_t rBit = mid & 0x1;
3173 mid >>= 1;
3174 mid |= -(mid & ((BigElement)1 <<
3175 (sizeof(BigElement) * 8 - 1 - imm)));
3176 mid += rBit;
3177 if (mid != (Element)mid) {
3178 destElem = mask(sizeof(Element) * 8 - 1);
3179 if (srcElem1 < 0)
3180 destElem = ~destElem;
3181 fpscr.qc = 1;
3182 } else {
3183 destElem = mid;
3184 }
3185 } else {
3186 if (srcElem1 != (Element)srcElem1) {
3187 destElem = mask(sizeof(Element) * 8 - 1);
3188 if (srcElem1 < 0)
3189 destElem = ~destElem;
3190 fpscr.qc = 1;
3191 } else {
3192 destElem = srcElem1;
3193 }
3194 }
3195 FpscrQc = fpscr;
3196 '''
3197 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3198 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3199
3200 vqrshrunCode = '''
3201 FPSCR fpscr = (FPSCR) FpscrQc;
3202 if (imm > sizeof(srcElem1) * 8) {
3203 if (srcElem1 != 0)
3204 fpscr.qc = 1;
3205 destElem = 0;
3206 } else if (imm) {
3207 BigElement mid = (srcElem1 >> (imm - 1));
3208 uint64_t rBit = mid & 0x1;
3209 mid >>= 1;
3210 mid += rBit;
3211 if (mid != (Element)mid) {
3212 destElem = mask(sizeof(Element) * 8);
3213 fpscr.qc = 1;
3214 } else {
3215 destElem = mid;
3216 }
3217 } else {
3218 if (srcElem1 != (Element)srcElem1) {
3219 destElem = mask(sizeof(Element) * 8 - 1);
3220 fpscr.qc = 1;
3221 } else {
3222 destElem = srcElem1;
3223 }
3224 }
3225 FpscrQc = fpscr;
3226 '''
3227 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3228 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3229
3230 vqrshrunsCode = '''
3231 FPSCR fpscr = (FPSCR) FpscrQc;
3232 if (imm > sizeof(srcElem1) * 8) {
3233 if (srcElem1 != 0)
3234 fpscr.qc = 1;
3235 destElem = 0;
3236 } else if (imm) {
3237 BigElement mid = (srcElem1 >> (imm - 1));
3238 uint64_t rBit = mid & 0x1;
3239 mid >>= 1;
3240 mid |= -(mid & ((BigElement)1 <<
3241 (sizeof(BigElement) * 8 - 1 - imm)));
3242 mid += rBit;
3243 if (bits(mid, sizeof(BigElement) * 8 - 1,
3244 sizeof(Element) * 8) != 0) {
3245 if (srcElem1 < 0) {
3246 destElem = 0;
3247 } else {
3248 destElem = mask(sizeof(Element) * 8);
3249 }
3250 fpscr.qc = 1;
3251 } else {
3252 destElem = mid;
3253 }
3254 } else {
3255 if (srcElem1 < 0) {
3256 fpscr.qc = 1;
3257 destElem = 0;
3258 } else {
3259 destElem = srcElem1;
3260 }
3261 }
3262 FpscrQc = fpscr;
3263 '''
3264 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3265 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3266
3267 vshllCode = '''
3268 if (imm >= sizeof(destElem) * 8) {
3269 destElem = 0;
3270 } else {
3271 destElem = (BigElement)srcElem1 << imm;
3272 }
3273 '''
3274 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3275
3276 vmovlCode = '''
3277 destElem = srcElem1;
3278 '''
3279 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3280
3281 vcvt2ufxCode = '''
3282 FPSCR fpscr = (FPSCR) FpscrExc;
3283 if (flushToZero(srcElem1))
3284 fpscr.idc = 1;
3285 VfpSavedState state = prepFpState(VfpRoundNearest);
3286 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3287 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3288 __asm__ __volatile__("" :: "m" (destReg));
3289 finishVfp(fpscr, state, true);
3290 FpscrExc = fpscr;
3291 '''
3292 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3293 2, vcvt2ufxCode, toInt = True)
3294 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3295 4, vcvt2ufxCode, toInt = True)
3296
3297 vcvt2sfxCode = '''
3298 FPSCR fpscr = (FPSCR) FpscrExc;
3299 if (flushToZero(srcElem1))
3300 fpscr.idc = 1;
3301 VfpSavedState state = prepFpState(VfpRoundNearest);
3302 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3303 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3304 __asm__ __volatile__("" :: "m" (destReg));
3305 finishVfp(fpscr, state, true);
3306 FpscrExc = fpscr;
3307 '''
3308 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3309 2, vcvt2sfxCode, toInt = True)
3310 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3311 4, vcvt2sfxCode, toInt = True)
3312
3313 vcvtu2fpCode = '''
3314 FPSCR fpscr = (FPSCR) FpscrExc;
3315 VfpSavedState state = prepFpState(VfpRoundNearest);
3316 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3317 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3318 __asm__ __volatile__("" :: "m" (destElem));
3319 finishVfp(fpscr, state, true);
3320 FpscrExc = fpscr;
3321 '''
3322 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3323 2, vcvtu2fpCode, fromInt = True)
3324 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3325 4, vcvtu2fpCode, fromInt = True)
3326
3327 vcvts2fpCode = '''
3328 FPSCR fpscr = (FPSCR) FpscrExc;
3329 VfpSavedState state = prepFpState(VfpRoundNearest);
3330 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3331 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3332 __asm__ __volatile__("" :: "m" (destElem));
3333 finishVfp(fpscr, state, true);
3334 FpscrExc = fpscr;
3335 '''
3336 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3337 2, vcvts2fpCode, fromInt = True)
3338 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3339 4, vcvts2fpCode, fromInt = True)
3340
3341 vcvts2hCode = '''
3342 destElem = 0;
3343 FPSCR fpscr = (FPSCR) FpscrExc;
3344 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3345 if (flushToZero(srcFp1))
3346 fpscr.idc = 1;
3347 VfpSavedState state = prepFpState(VfpRoundNearest);
3348 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3349 : "m" (srcFp1), "m" (destElem));
3350 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3351 fpscr.ahp, srcFp1);
3352 __asm__ __volatile__("" :: "m" (destElem));
3353 finishVfp(fpscr, state, true);
3354 FpscrExc = fpscr;
3355 '''
3356 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3357
3358 vcvth2sCode = '''
3359 destElem = 0;
3360 FPSCR fpscr = (FPSCR) FpscrExc;
3361 VfpSavedState state = prepFpState(VfpRoundNearest);
3362 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3363 : "m" (srcElem1), "m" (destElem));
3364 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3365 __asm__ __volatile__("" :: "m" (destElem));
3366 finishVfp(fpscr, state, true);
3367 FpscrExc = fpscr;
3368 '''
3369 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3370
3371 vrsqrteCode = '''
3372 destElem = unsignedRSqrtEstimate(srcElem1);
3373 '''
3374 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3375 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3376
3377 vrsqrtefpCode = '''
3378 FPSCR fpscr = (FPSCR) FpscrExc;
3379 if (flushToZero(srcReg1))
3380 fpscr.idc = 1;
3381 destReg = fprSqrtEstimate(fpscr, srcReg1);
3382 FpscrExc = fpscr;
3383 '''
3384 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3385 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3386
3387 vrecpeCode = '''
3388 destElem = unsignedRecipEstimate(srcElem1);
3389 '''
3390 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3391 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3392
3393 vrecpefpCode = '''
3394 FPSCR fpscr = (FPSCR) FpscrExc;
3395 if (flushToZero(srcReg1))
3396 fpscr.idc = 1;
3397 destReg = fpRecipEstimate(fpscr, srcReg1);
3398 FpscrExc = fpscr;
3399 '''
3400 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3401 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3402
3403 vrev16Code = '''
3404 destElem = srcElem1;
3405 unsigned groupSize = ((1 << 1) / sizeof(Element));
3406 unsigned reverseMask = (groupSize - 1);
3407 j = i ^ reverseMask;
3408 '''
3409 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3410 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3411 vrev32Code = '''
3412 destElem = srcElem1;
3413 unsigned groupSize = ((1 << 2) / sizeof(Element));
3414 unsigned reverseMask = (groupSize - 1);
3415 j = i ^ reverseMask;
3416 '''
3417 twoRegMiscInst("vrev32", "NVrev32D",
3418 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3419 twoRegMiscInst("vrev32", "NVrev32Q",
3420 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3421 vrev64Code = '''
3422 destElem = srcElem1;
3423 unsigned groupSize = ((1 << 3) / sizeof(Element));
3424 unsigned reverseMask = (groupSize - 1);
3425 j = i ^ reverseMask;
3426 '''
3427 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3428 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3429
3430 split('exec')
3431 exec_output += vcompares + vcomparesL
3432
3433 vpaddlCode = '''
3434 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3435 '''
3436 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3437 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3438
3439 vpadalCode = '''
3440 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3441 '''
3442 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3443 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3444
3445 vclsCode = '''
3446 unsigned count = 0;
3447 if (srcElem1 < 0) {
3448 srcElem1 <<= 1;
3449 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3450 count++;
3451 srcElem1 <<= 1;
3452 }
3453 } else {
3454 srcElem1 <<= 1;
3455 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3456 count++;
3457 srcElem1 <<= 1;
3458 }
3459 }
3460 destElem = count;
3461 '''
3462 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3463 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3464
3465 vclzCode = '''
3466 unsigned count = 0;
3467 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3468 count++;
3469 srcElem1 <<= 1;
3470 }
3471 destElem = count;
3472 '''
3473 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3474 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3475
3476 vcntCode = '''
3477 unsigned count = 0;
3478 while (srcElem1 && count < sizeof(Element) * 8) {
3479 count += srcElem1 & 0x1;
3480 srcElem1 >>= 1;
3481 }
3482 destElem = count;
3483 '''
3484
3485 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3486 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3487
3488 vmvnCode = '''
3489 destElem = ~srcElem1;
3490 '''
3491 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3492 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3493
3494 vqabsCode = '''
3495 FPSCR fpscr = (FPSCR) FpscrQc;
3496 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3497 fpscr.qc = 1;
3498 destElem = ~srcElem1;
3499 } else if (srcElem1 < 0) {
3500 destElem = -srcElem1;
3501 } else {
3502 destElem = srcElem1;
3503 }
3504 FpscrQc = fpscr;
3505 '''
3506 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3507 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3508
3509 vqnegCode = '''
3510 FPSCR fpscr = (FPSCR) FpscrQc;
3511 if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3512 fpscr.qc = 1;
3513 destElem = ~srcElem1;
3514 } else {
3515 destElem = -srcElem1;
3516 }
3517 FpscrQc = fpscr;
3518 '''
3519 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3520 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3521
3522 vabsCode = '''
3523 if (srcElem1 < 0) {
3524 destElem = -srcElem1;
3525 } else {
3526 destElem = srcElem1;
3527 }
3528 '''
3529
3530 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3531 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3532 vabsfpCode = '''
3533 union
3534 {
3535 uint32_t i;
3536 float f;
3537 } cStruct;
3538 cStruct.f = srcReg1;
3539 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3540 destReg = cStruct.f;
3541 '''
3542 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3543 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3544
3545 vnegCode = '''
3546 destElem = -srcElem1;
3547 '''
3548 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3549 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3550 vnegfpCode = '''
3551 destReg = -srcReg1;
3552 '''
3553 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3554 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3555
3556 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3557 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3558 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3559 vcgtfpCode = '''
3560 FPSCR fpscr = (FPSCR) FpscrExc;
3561 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3561 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc,
3562 true, true, VfpRoundNearest);
3563 destReg = (res == 0) ? -1 : 0;
3564 if (res == 2.0)
3565 fpscr.ioc = 1;
3566 FpscrExc = fpscr;
3567 '''
3568 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3569 2, vcgtfpCode, toInt = True)
3570 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3571 4, vcgtfpCode, toInt = True)
3572
3573 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3574 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3575 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3576 vcgefpCode = '''
3577 FPSCR fpscr = (FPSCR) FpscrExc;
3562 true, true, VfpRoundNearest);
3563 destReg = (res == 0) ? -1 : 0;
3564 if (res == 2.0)
3565 fpscr.ioc = 1;
3566 FpscrExc = fpscr;
3567 '''
3568 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3569 2, vcgtfpCode, toInt = True)
3570 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3571 4, vcgtfpCode, toInt = True)
3572
3573 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3574 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3575 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3576 vcgefpCode = '''
3577 FPSCR fpscr = (FPSCR) FpscrExc;
3578 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3578 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc,
3579 true, true, VfpRoundNearest);
3580 destReg = (res == 0) ? -1 : 0;
3581 if (res == 2.0)
3582 fpscr.ioc = 1;
3583 FpscrExc = fpscr;
3584 '''
3585 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3586 2, vcgefpCode, toInt = True)
3587 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3588 4, vcgefpCode, toInt = True)
3589
3590 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3591 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3592 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3593 vceqfpCode = '''
3594 FPSCR fpscr = (FPSCR) FpscrExc;
3579 true, true, VfpRoundNearest);
3580 destReg = (res == 0) ? -1 : 0;
3581 if (res == 2.0)
3582 fpscr.ioc = 1;
3583 FpscrExc = fpscr;
3584 '''
3585 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3586 2, vcgefpCode, toInt = True)
3587 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3588 4, vcgefpCode, toInt = True)
3589
3590 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3591 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3592 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3593 vceqfpCode = '''
3594 FPSCR fpscr = (FPSCR) FpscrExc;
3595 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3595 float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc,
3596 true, true, VfpRoundNearest);
3597 destReg = (res == 0) ? -1 : 0;
3598 if (res == 2.0)
3599 fpscr.ioc = 1;
3600 FpscrExc = fpscr;
3601 '''
3602 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3603 2, vceqfpCode, toInt = True)
3604 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3605 4, vceqfpCode, toInt = True)
3606
3607 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3608 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3609 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3610 vclefpCode = '''
3611 FPSCR fpscr = (FPSCR) FpscrExc;
3596 true, true, VfpRoundNearest);
3597 destReg = (res == 0) ? -1 : 0;
3598 if (res == 2.0)
3599 fpscr.ioc = 1;
3600 FpscrExc = fpscr;
3601 '''
3602 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3603 2, vceqfpCode, toInt = True)
3604 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3605 4, vceqfpCode, toInt = True)
3606
3607 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3608 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3609 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3610 vclefpCode = '''
3611 FPSCR fpscr = (FPSCR) FpscrExc;
3612 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3612 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc,
3613 true, true, VfpRoundNearest);
3614 destReg = (res == 0) ? -1 : 0;
3615 if (res == 2.0)
3616 fpscr.ioc = 1;
3617 FpscrExc = fpscr;
3618 '''
3619 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3620 2, vclefpCode, toInt = True)
3621 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3622 4, vclefpCode, toInt = True)
3623
3624 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3625 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3626 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3627 vcltfpCode = '''
3628 FPSCR fpscr = (FPSCR) FpscrExc;
3613 true, true, VfpRoundNearest);
3614 destReg = (res == 0) ? -1 : 0;
3615 if (res == 2.0)
3616 fpscr.ioc = 1;
3617 FpscrExc = fpscr;
3618 '''
3619 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3620 2, vclefpCode, toInt = True)
3621 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3622 4, vclefpCode, toInt = True)
3623
3624 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3625 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3626 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3627 vcltfpCode = '''
3628 FPSCR fpscr = (FPSCR) FpscrExc;
3629 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3629 float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc,
3630 true, true, VfpRoundNearest);
3631 destReg = (res == 0) ? -1 : 0;
3632 if (res == 2.0)
3633 fpscr.ioc = 1;
3634 FpscrExc = fpscr;
3635 '''
3636 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3637 2, vcltfpCode, toInt = True)
3638 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3639 4, vcltfpCode, toInt = True)
3640
3641 vswpCode = '''
3630 true, true, VfpRoundNearest);
3631 destReg = (res == 0) ? -1 : 0;
3632 if (res == 2.0)
3633 fpscr.ioc = 1;
3634 FpscrExc = fpscr;
3635 '''
3636 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3637 2, vcltfpCode, toInt = True)
3638 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3639 4, vcltfpCode, toInt = True)
3640
3641 vswpCode = '''
3642 FloatRegBits mid;
3642 uint32_t mid;
3643 for (unsigned r = 0; r < rCount; r++) {
3644 mid = srcReg1.regs[r];
3645 srcReg1.regs[r] = destReg.regs[r];
3646 destReg.regs[r] = mid;
3647 }
3648 '''
3649 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3650 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3651
3652 vtrnCode = '''
3653 Element mid;
3654 for (unsigned i = 0; i < eCount; i += 2) {
3655 mid = srcReg1.elements[i];
3656 srcReg1.elements[i] = destReg.elements[i + 1];
3657 destReg.elements[i + 1] = mid;
3658 }
3659 '''
3660 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3661 smallUnsignedTypes, 2, vtrnCode)
3662 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3663 smallUnsignedTypes, 4, vtrnCode)
3664
3665 vuzpCode = '''
3666 Element mid[eCount];
3667 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3668 for (unsigned i = 0; i < eCount / 2; i++) {
3669 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3670 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3671 destReg.elements[i] = destReg.elements[2 * i];
3672 }
3673 for (unsigned i = 0; i < eCount / 2; i++) {
3674 destReg.elements[eCount / 2 + i] = mid[2 * i];
3675 }
3676 '''
3677 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3678 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3679
3680 vzipCode = '''
3681 Element mid[eCount];
3682 memcpy(&mid, &destReg, sizeof(destReg));
3683 for (unsigned i = 0; i < eCount / 2; i++) {
3684 destReg.elements[2 * i] = mid[i];
3685 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3686 }
3687 for (int i = 0; i < eCount / 2; i++) {
3688 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3689 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3690 }
3691 '''
3692 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3693 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3694
3695 vmovnCode = 'destElem = srcElem1;'
3696 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3697
3698 vdupCode = 'destElem = srcElem1;'
3699 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3700 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3701
3702 def vdupGprInst(name, Name, opClass, types, rCount):
3703 global header_output, exec_output
3704 eWalkCode = simdEnabledCheckCode + '''
3705 RegVect destReg;
3706 for (unsigned i = 0; i < eCount; i++) {
3707 destReg.elements[i] = htog((Element)Op1);
3708 }
3709 '''
3710 for reg in range(rCount):
3711 eWalkCode += '''
3712 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3713 ''' % { "reg" : reg }
3714 iop = InstObjParams(name, Name,
3715 "RegRegOp",
3716 { "code": eWalkCode,
3717 "r_count": rCount,
3718 "predicate_test": predicateTest,
3719 "op_class": opClass }, [])
3720 header_output += NeonRegRegOpDeclare.subst(iop)
3721 exec_output += NeonEqualRegExecute.subst(iop)
3722 for type in types:
3723 substDict = { "targs" : type,
3724 "class_name" : Name }
3725 exec_output += NeonExecDeclare.subst(substDict)
3726 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3727 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3728
3729 vmovCode = 'destElem = imm;'
3730 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3731 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3732
3733 vorrCode = 'destElem |= imm;'
3734 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3735 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3736
3737 vmvnCode = 'destElem = ~imm;'
3738 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3739 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3740
3741 vbicCode = 'destElem &= ~imm;'
3742 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3743 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3744
3745 vqmovnCode = '''
3746 FPSCR fpscr = (FPSCR) FpscrQc;
3747 destElem = srcElem1;
3748 if ((BigElement)destElem != srcElem1) {
3749 fpscr.qc = 1;
3750 destElem = mask(sizeof(Element) * 8 - 1);
3751 if (srcElem1 < 0)
3752 destElem = ~destElem;
3753 }
3754 FpscrQc = fpscr;
3755 '''
3756 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3757
3758 vqmovunCode = '''
3759 FPSCR fpscr = (FPSCR) FpscrQc;
3760 destElem = srcElem1;
3761 if ((BigElement)destElem != srcElem1) {
3762 fpscr.qc = 1;
3763 destElem = mask(sizeof(Element) * 8);
3764 }
3765 FpscrQc = fpscr;
3766 '''
3767 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3768 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3769
3770 vqmovunsCode = '''
3771 FPSCR fpscr = (FPSCR) FpscrQc;
3772 destElem = srcElem1;
3773 if (srcElem1 < 0 ||
3774 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3775 fpscr.qc = 1;
3776 destElem = mask(sizeof(Element) * 8);
3777 if (srcElem1 < 0)
3778 destElem = ~destElem;
3779 }
3780 FpscrQc = fpscr;
3781 '''
3782 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3783 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3784
3785 def buildVext(name, Name, opClass, types, rCount, op):
3786 global header_output, exec_output
3787 eWalkCode = simdEnabledCheckCode + '''
3788 RegVect srcReg1, srcReg2, destReg;
3789 '''
3790 for reg in range(rCount):
3791 eWalkCode += '''
3792 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3793 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3794 ''' % { "reg" : reg }
3795 eWalkCode += op
3796 for reg in range(rCount):
3797 eWalkCode += '''
3798 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3799 ''' % { "reg" : reg }
3800 iop = InstObjParams(name, Name,
3801 "RegRegRegImmOp",
3802 { "code": eWalkCode,
3803 "r_count": rCount,
3804 "predicate_test": predicateTest,
3805 "op_class": opClass }, [])
3806 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3807 exec_output += NeonEqualRegExecute.subst(iop)
3808 for type in types:
3809 substDict = { "targs" : type,
3810 "class_name" : Name }
3811 exec_output += NeonExecDeclare.subst(substDict)
3812
3813 vextCode = '''
3814 for (unsigned i = 0; i < eCount; i++) {
3815 unsigned index = i + imm;
3816 if (index < eCount) {
3817 destReg.elements[i] = srcReg1.elements[index];
3818 } else {
3819 index -= eCount;
3820 if (index >= eCount) {
3821 fault = std::make_shared<UndefinedInstruction>(machInst,
3822 false,
3823 mnemonic);
3824 } else {
3825 destReg.elements[i] = srcReg2.elements[index];
3826 }
3827 }
3828 }
3829 '''
3830 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3831 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3832
3833 def buildVtbxl(name, Name, opClass, length, isVtbl):
3834 global header_output, decoder_output, exec_output
3835 code = simdEnabledCheckCode + '''
3836 union
3837 {
3838 uint8_t bytes[32];
3643 for (unsigned r = 0; r < rCount; r++) {
3644 mid = srcReg1.regs[r];
3645 srcReg1.regs[r] = destReg.regs[r];
3646 destReg.regs[r] = mid;
3647 }
3648 '''
3649 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3650 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3651
3652 vtrnCode = '''
3653 Element mid;
3654 for (unsigned i = 0; i < eCount; i += 2) {
3655 mid = srcReg1.elements[i];
3656 srcReg1.elements[i] = destReg.elements[i + 1];
3657 destReg.elements[i + 1] = mid;
3658 }
3659 '''
3660 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3661 smallUnsignedTypes, 2, vtrnCode)
3662 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3663 smallUnsignedTypes, 4, vtrnCode)
3664
3665 vuzpCode = '''
3666 Element mid[eCount];
3667 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3668 for (unsigned i = 0; i < eCount / 2; i++) {
3669 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3670 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3671 destReg.elements[i] = destReg.elements[2 * i];
3672 }
3673 for (unsigned i = 0; i < eCount / 2; i++) {
3674 destReg.elements[eCount / 2 + i] = mid[2 * i];
3675 }
3676 '''
3677 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3678 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3679
3680 vzipCode = '''
3681 Element mid[eCount];
3682 memcpy(&mid, &destReg, sizeof(destReg));
3683 for (unsigned i = 0; i < eCount / 2; i++) {
3684 destReg.elements[2 * i] = mid[i];
3685 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3686 }
3687 for (int i = 0; i < eCount / 2; i++) {
3688 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3689 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3690 }
3691 '''
3692 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3693 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3694
3695 vmovnCode = 'destElem = srcElem1;'
3696 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3697
3698 vdupCode = 'destElem = srcElem1;'
3699 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3700 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3701
3702 def vdupGprInst(name, Name, opClass, types, rCount):
3703 global header_output, exec_output
3704 eWalkCode = simdEnabledCheckCode + '''
3705 RegVect destReg;
3706 for (unsigned i = 0; i < eCount; i++) {
3707 destReg.elements[i] = htog((Element)Op1);
3708 }
3709 '''
3710 for reg in range(rCount):
3711 eWalkCode += '''
3712 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3713 ''' % { "reg" : reg }
3714 iop = InstObjParams(name, Name,
3715 "RegRegOp",
3716 { "code": eWalkCode,
3717 "r_count": rCount,
3718 "predicate_test": predicateTest,
3719 "op_class": opClass }, [])
3720 header_output += NeonRegRegOpDeclare.subst(iop)
3721 exec_output += NeonEqualRegExecute.subst(iop)
3722 for type in types:
3723 substDict = { "targs" : type,
3724 "class_name" : Name }
3725 exec_output += NeonExecDeclare.subst(substDict)
3726 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3727 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3728
3729 vmovCode = 'destElem = imm;'
3730 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3731 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3732
3733 vorrCode = 'destElem |= imm;'
3734 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3735 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3736
3737 vmvnCode = 'destElem = ~imm;'
3738 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3739 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3740
3741 vbicCode = 'destElem &= ~imm;'
3742 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3743 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3744
3745 vqmovnCode = '''
3746 FPSCR fpscr = (FPSCR) FpscrQc;
3747 destElem = srcElem1;
3748 if ((BigElement)destElem != srcElem1) {
3749 fpscr.qc = 1;
3750 destElem = mask(sizeof(Element) * 8 - 1);
3751 if (srcElem1 < 0)
3752 destElem = ~destElem;
3753 }
3754 FpscrQc = fpscr;
3755 '''
3756 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3757
3758 vqmovunCode = '''
3759 FPSCR fpscr = (FPSCR) FpscrQc;
3760 destElem = srcElem1;
3761 if ((BigElement)destElem != srcElem1) {
3762 fpscr.qc = 1;
3763 destElem = mask(sizeof(Element) * 8);
3764 }
3765 FpscrQc = fpscr;
3766 '''
3767 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3768 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3769
3770 vqmovunsCode = '''
3771 FPSCR fpscr = (FPSCR) FpscrQc;
3772 destElem = srcElem1;
3773 if (srcElem1 < 0 ||
3774 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3775 fpscr.qc = 1;
3776 destElem = mask(sizeof(Element) * 8);
3777 if (srcElem1 < 0)
3778 destElem = ~destElem;
3779 }
3780 FpscrQc = fpscr;
3781 '''
3782 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3783 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3784
3785 def buildVext(name, Name, opClass, types, rCount, op):
3786 global header_output, exec_output
3787 eWalkCode = simdEnabledCheckCode + '''
3788 RegVect srcReg1, srcReg2, destReg;
3789 '''
3790 for reg in range(rCount):
3791 eWalkCode += '''
3792 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3793 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3794 ''' % { "reg" : reg }
3795 eWalkCode += op
3796 for reg in range(rCount):
3797 eWalkCode += '''
3798 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3799 ''' % { "reg" : reg }
3800 iop = InstObjParams(name, Name,
3801 "RegRegRegImmOp",
3802 { "code": eWalkCode,
3803 "r_count": rCount,
3804 "predicate_test": predicateTest,
3805 "op_class": opClass }, [])
3806 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3807 exec_output += NeonEqualRegExecute.subst(iop)
3808 for type in types:
3809 substDict = { "targs" : type,
3810 "class_name" : Name }
3811 exec_output += NeonExecDeclare.subst(substDict)
3812
3813 vextCode = '''
3814 for (unsigned i = 0; i < eCount; i++) {
3815 unsigned index = i + imm;
3816 if (index < eCount) {
3817 destReg.elements[i] = srcReg1.elements[index];
3818 } else {
3819 index -= eCount;
3820 if (index >= eCount) {
3821 fault = std::make_shared<UndefinedInstruction>(machInst,
3822 false,
3823 mnemonic);
3824 } else {
3825 destReg.elements[i] = srcReg2.elements[index];
3826 }
3827 }
3828 }
3829 '''
3830 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3831 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3832
3833 def buildVtbxl(name, Name, opClass, length, isVtbl):
3834 global header_output, decoder_output, exec_output
3835 code = simdEnabledCheckCode + '''
3836 union
3837 {
3838 uint8_t bytes[32];
3839 FloatRegBits regs[8];
3839 uint32_t regs[8];
3840 } table;
3841
3842 union
3843 {
3844 uint8_t bytes[8];
3840 } table;
3841
3842 union
3843 {
3844 uint8_t bytes[8];
3845 FloatRegBits regs[2];
3845 uint32_t regs[2];
3846 } destReg, srcReg2;
3847
3848 const unsigned length = %(length)d;
3849 const bool isVtbl = %(isVtbl)s;
3850
3851 srcReg2.regs[0] = htog(FpOp2P0_uw);
3852 srcReg2.regs[1] = htog(FpOp2P1_uw);
3853
3854 destReg.regs[0] = htog(FpDestP0_uw);
3855 destReg.regs[1] = htog(FpDestP1_uw);
3856 ''' % { "length" : length, "isVtbl" : isVtbl }
3857 for reg in range(8):
3858 if reg < length * 2:
3859 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3860 { "reg" : reg }
3861 else:
3862 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3863 code += '''
3864 for (unsigned i = 0; i < sizeof(destReg); i++) {
3865 uint8_t index = srcReg2.bytes[i];
3866 if (index < 8 * length) {
3867 destReg.bytes[i] = table.bytes[index];
3868 } else {
3869 if (isVtbl)
3870 destReg.bytes[i] = 0;
3871 // else destReg.bytes[i] unchanged
3872 }
3873 }
3874
3875 FpDestP0_uw = gtoh(destReg.regs[0]);
3876 FpDestP1_uw = gtoh(destReg.regs[1]);
3877 '''
3878 iop = InstObjParams(name, Name,
3879 "RegRegRegOp",
3880 { "code": code,
3881 "predicate_test": predicateTest,
3882 "op_class": opClass }, [])
3883 header_output += RegRegRegOpDeclare.subst(iop)
3884 decoder_output += RegRegRegOpConstructor.subst(iop)
3885 exec_output += PredOpExecute.subst(iop)
3886
3887 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3888 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3889 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3890 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3891
3892 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3893 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3894 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3895 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3896}};
3846 } destReg, srcReg2;
3847
3848 const unsigned length = %(length)d;
3849 const bool isVtbl = %(isVtbl)s;
3850
3851 srcReg2.regs[0] = htog(FpOp2P0_uw);
3852 srcReg2.regs[1] = htog(FpOp2P1_uw);
3853
3854 destReg.regs[0] = htog(FpDestP0_uw);
3855 destReg.regs[1] = htog(FpDestP1_uw);
3856 ''' % { "length" : length, "isVtbl" : isVtbl }
3857 for reg in range(8):
3858 if reg < length * 2:
3859 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3860 { "reg" : reg }
3861 else:
3862 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3863 code += '''
3864 for (unsigned i = 0; i < sizeof(destReg); i++) {
3865 uint8_t index = srcReg2.bytes[i];
3866 if (index < 8 * length) {
3867 destReg.bytes[i] = table.bytes[index];
3868 } else {
3869 if (isVtbl)
3870 destReg.bytes[i] = 0;
3871 // else destReg.bytes[i] unchanged
3872 }
3873 }
3874
3875 FpDestP0_uw = gtoh(destReg.regs[0]);
3876 FpDestP1_uw = gtoh(destReg.regs[1]);
3877 '''
3878 iop = InstObjParams(name, Name,
3879 "RegRegRegOp",
3880 { "code": code,
3881 "predicate_test": predicateTest,
3882 "op_class": opClass }, [])
3883 header_output += RegRegRegOpDeclare.subst(iop)
3884 decoder_output += RegRegRegOpConstructor.subst(iop)
3885 exec_output += PredOpExecute.subst(iop)
3886
3887 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3888 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3889 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3890 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3891
3892 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3893 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3894 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3895 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3896}};