neon.isa (10037:5cac77888310) neon.isa (10197:a60405212dea)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133 IntRegIndex dest, IntRegIndex op1,
134 IntRegIndex op2)
135 {
136 switch (size) {
137 case 1:
138 return new Base<int16_t>(machInst, dest, op1, op2);
139 case 2:
140 return new Base<int32_t>(machInst, dest, op1, op2);
141 default:
142 return new Unknown(machInst);
143 }
144 }
145
146 template <template <typename T> class Base>
147 StaticInstPtr
148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2, uint64_t imm)
151 {
152 switch (size) {
153 case 1:
154 return new Base<int16_t>(machInst, dest, op1, op2, imm);
155 case 2:
156 return new Base<int32_t>(machInst, dest, op1, op2, imm);
157 default:
158 return new Unknown(machInst);
159 }
160 }
161
162 template <template <typename T> class Base>
163 StaticInstPtr
164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (notSigned) {
169 return decodeNeonUThreeUSReg<Base>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<Base>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUThreeSReg(bool q, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (q) {
185 return decodeNeonUThreeUSReg<BaseQ>(
186 size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonUThreeUSReg<BaseD>(
189 size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonSThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonSThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonSThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeXReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (q) {
233 return decodeNeonUThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonUThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
247 {
248 if (notSigned) {
249 return decodeNeonUThreeSReg<BaseD, BaseQ>(
250 q, size, machInst, dest, op1, op2);
251 } else {
252 return decodeNeonSThreeSReg<BaseD, BaseQ>(
253 q, size, machInst, dest, op1, op2);
254 }
255 }
256
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
259 StaticInstPtr
260 decodeNeonUThreeReg(bool q, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
263 {
264 if (q) {
265 return decodeNeonUThreeUReg<BaseQ>(
266 size, machInst, dest, op1, op2);
267 } else {
268 return decodeNeonUThreeUReg<BaseD>(
269 size, machInst, dest, op1, op2);
270 }
271 }
272
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
275 StaticInstPtr
276 decodeNeonSThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
279 {
280 if (q) {
281 return decodeNeonSThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
283 } else {
284 return decodeNeonSThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
286 }
287 }
288
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
291 StaticInstPtr
292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
295 {
296 if (notSigned) {
297 return decodeNeonUThreeReg<BaseD, BaseQ>(
298 q, size, machInst, dest, op1, op2);
299 } else {
300 return decodeNeonSThreeReg<BaseD, BaseQ>(
301 q, size, machInst, dest, op1, op2);
302 }
303 }
304
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
307 StaticInstPtr
308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310 {
311 if (q) {
312 if (size)
313 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314 else
315 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316 } else {
317 if (size)
318 return new Unknown(machInst);
319 else
320 return new BaseD<uint32_t>(machInst, dest, op1, op2);
321 }
322 }
323
324 template <template <typename T> class Base>
325 StaticInstPtr
326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328 {
329 if (size)
330 return new Base<uint64_t>(machInst, dest, op1, op2);
331 else
332 return new Base<uint32_t>(machInst, dest, op1, op2);
333 }
334
335 template <template <typename T> class Base>
336 StaticInstPtr
337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338 IntRegIndex dest, IntRegIndex op1,
339 IntRegIndex op2, uint64_t imm)
340 {
341 if (size)
342 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343 else
344 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345 }
346
347 template <template <typename T> class BaseD,
348 template <typename T> class BaseQ>
349 StaticInstPtr
350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351 IntRegIndex dest, IntRegIndex op1,
352 IntRegIndex op2, uint64_t imm)
353 {
354 if (q) {
355 switch (size) {
356 case 1:
357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358 case 2:
359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360 default:
361 return new Unknown(machInst);
362 }
363 } else {
364 switch (size) {
365 case 1:
366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367 case 2:
368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369 default:
370 return new Unknown(machInst);
371 }
372 }
373 }
374
375 template <template <typename T> class BaseD,
376 template <typename T> class BaseQ>
377 StaticInstPtr
378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379 IntRegIndex dest, IntRegIndex op1,
380 IntRegIndex op2, uint64_t imm)
381 {
382 if (q) {
383 switch (size) {
384 case 1:
385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386 case 2:
387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388 default:
389 return new Unknown(machInst);
390 }
391 } else {
392 switch (size) {
393 case 1:
394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395 case 2:
396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397 default:
398 return new Unknown(machInst);
399 }
400 }
401 }
402
403 template <template <typename T> class BaseD,
404 template <typename T> class BaseQ>
405 StaticInstPtr
406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407 IntRegIndex dest, IntRegIndex op1,
408 IntRegIndex op2, uint64_t imm)
409 {
410 if (q) {
411 if (size)
412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413 else
414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415 } else {
416 if (size)
417 return new Unknown(machInst);
418 else
419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420 }
421 }
422
423 template <template <typename T> class BaseD,
424 template <typename T> class BaseQ>
425 StaticInstPtr
426 decodeNeonUTwoShiftReg(bool q, unsigned size,
427 ExtMachInst machInst, IntRegIndex dest,
428 IntRegIndex op1, uint64_t imm)
429 {
430 if (q) {
431 switch (size) {
432 case 0:
433 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434 case 1:
435 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436 case 2:
437 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438 case 3:
439 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440 default:
441 return new Unknown(machInst);
442 }
443 } else {
444 switch (size) {
445 case 0:
446 return new BaseD<uint8_t>(machInst, dest, op1, imm);
447 case 1:
448 return new BaseD<uint16_t>(machInst, dest, op1, imm);
449 case 2:
450 return new BaseD<uint32_t>(machInst, dest, op1, imm);
451 case 3:
452 return new BaseD<uint64_t>(machInst, dest, op1, imm);
453 default:
454 return new Unknown(machInst);
455 }
456 }
457 }
458
459 template <template <typename T> class BaseD,
460 template <typename T> class BaseQ>
461 StaticInstPtr
462 decodeNeonSTwoShiftReg(bool q, unsigned size,
463 ExtMachInst machInst, IntRegIndex dest,
464 IntRegIndex op1, uint64_t imm)
465 {
466 if (q) {
467 switch (size) {
468 case 0:
469 return new BaseQ<int8_t>(machInst, dest, op1, imm);
470 case 1:
471 return new BaseQ<int16_t>(machInst, dest, op1, imm);
472 case 2:
473 return new BaseQ<int32_t>(machInst, dest, op1, imm);
474 case 3:
475 return new BaseQ<int64_t>(machInst, dest, op1, imm);
476 default:
477 return new Unknown(machInst);
478 }
479 } else {
480 switch (size) {
481 case 0:
482 return new BaseD<int8_t>(machInst, dest, op1, imm);
483 case 1:
484 return new BaseD<int16_t>(machInst, dest, op1, imm);
485 case 2:
486 return new BaseD<int32_t>(machInst, dest, op1, imm);
487 case 3:
488 return new BaseD<int64_t>(machInst, dest, op1, imm);
489 default:
490 return new Unknown(machInst);
491 }
492 }
493 }
494
495
496 template <template <typename T> class BaseD,
497 template <typename T> class BaseQ>
498 StaticInstPtr
499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500 ExtMachInst machInst, IntRegIndex dest,
501 IntRegIndex op1, uint64_t imm)
502 {
503 if (notSigned) {
504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505 q, size, machInst, dest, op1, imm);
506 } else {
507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508 q, size, machInst, dest, op1, imm);
509 }
510 }
511
512 template <template <typename T> class Base>
513 StaticInstPtr
514 decodeNeonUTwoShiftUSReg(unsigned size,
515 ExtMachInst machInst, IntRegIndex dest,
516 IntRegIndex op1, uint64_t imm)
517 {
518 switch (size) {
519 case 0:
520 return new Base<uint8_t>(machInst, dest, op1, imm);
521 case 1:
522 return new Base<uint16_t>(machInst, dest, op1, imm);
523 case 2:
524 return new Base<uint32_t>(machInst, dest, op1, imm);
525 default:
526 return new Unknown(machInst);
527 }
528 }
529
530 template <template <typename T> class Base>
531 StaticInstPtr
532 decodeNeonUTwoShiftUReg(unsigned size,
533 ExtMachInst machInst, IntRegIndex dest,
534 IntRegIndex op1, uint64_t imm)
535 {
536 switch (size) {
537 case 0:
538 return new Base<uint8_t>(machInst, dest, op1, imm);
539 case 1:
540 return new Base<uint16_t>(machInst, dest, op1, imm);
541 case 2:
542 return new Base<uint32_t>(machInst, dest, op1, imm);
543 case 3:
544 return new Base<uint64_t>(machInst, dest, op1, imm);
545 default:
546 return new Unknown(machInst);
547 }
548 }
549
550 template <template <typename T> class Base>
551 StaticInstPtr
552 decodeNeonSTwoShiftUReg(unsigned size,
553 ExtMachInst machInst, IntRegIndex dest,
554 IntRegIndex op1, uint64_t imm)
555 {
556 switch (size) {
557 case 0:
558 return new Base<int8_t>(machInst, dest, op1, imm);
559 case 1:
560 return new Base<int16_t>(machInst, dest, op1, imm);
561 case 2:
562 return new Base<int32_t>(machInst, dest, op1, imm);
563 case 3:
564 return new Base<int64_t>(machInst, dest, op1, imm);
565 default:
566 return new Unknown(machInst);
567 }
568 }
569
570 template <template <typename T> class BaseD,
571 template <typename T> class BaseQ>
572 StaticInstPtr
573 decodeNeonUTwoShiftSReg(bool q, unsigned size,
574 ExtMachInst machInst, IntRegIndex dest,
575 IntRegIndex op1, uint64_t imm)
576 {
577 if (q) {
578 return decodeNeonUTwoShiftUSReg<BaseQ>(
579 size, machInst, dest, op1, imm);
580 } else {
581 return decodeNeonUTwoShiftUSReg<BaseD>(
582 size, machInst, dest, op1, imm);
583 }
584 }
585
586 template <template <typename T> class Base>
587 StaticInstPtr
588 decodeNeonSTwoShiftUSReg(unsigned size,
589 ExtMachInst machInst, IntRegIndex dest,
590 IntRegIndex op1, uint64_t imm)
591 {
592 switch (size) {
593 case 0:
594 return new Base<int8_t>(machInst, dest, op1, imm);
595 case 1:
596 return new Base<int16_t>(machInst, dest, op1, imm);
597 case 2:
598 return new Base<int32_t>(machInst, dest, op1, imm);
599 default:
600 return new Unknown(machInst);
601 }
602 }
603
604 template <template <typename T> class BaseD,
605 template <typename T> class BaseQ>
606 StaticInstPtr
607 decodeNeonSTwoShiftSReg(bool q, unsigned size,
608 ExtMachInst machInst, IntRegIndex dest,
609 IntRegIndex op1, uint64_t imm)
610 {
611 if (q) {
612 return decodeNeonSTwoShiftUSReg<BaseQ>(
613 size, machInst, dest, op1, imm);
614 } else {
615 return decodeNeonSTwoShiftUSReg<BaseD>(
616 size, machInst, dest, op1, imm);
617 }
618 }
619
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
622 StaticInstPtr
623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
626 {
627 if (notSigned) {
628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629 q, size, machInst, dest, op1, imm);
630 } else {
631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632 q, size, machInst, dest, op1, imm);
633 }
634 }
635
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
638 StaticInstPtr
639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641 {
642 if (q) {
643 return decodeNeonUTwoShiftUReg<BaseQ>(
644 size, machInst, dest, op1, imm);
645 } else {
646 return decodeNeonUTwoShiftUSReg<BaseD>(
647 size, machInst, dest, op1, imm);
648 }
649 }
650
651 template <template <typename T> class BaseD,
652 template <typename T> class BaseQ>
653 StaticInstPtr
654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656 {
657 if (q) {
658 return decodeNeonSTwoShiftUReg<BaseQ>(
659 size, machInst, dest, op1, imm);
660 } else {
661 return decodeNeonSTwoShiftUSReg<BaseD>(
662 size, machInst, dest, op1, imm);
663 }
664 }
665
666 template <template <typename T> class Base>
667 StaticInstPtr
668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670 {
671 if (size)
672 return new Base<uint64_t>(machInst, dest, op1, imm);
673 else
674 return new Base<uint32_t>(machInst, dest, op1, imm);
675 }
676
677 template <template <typename T> class BaseD,
678 template <typename T> class BaseQ>
679 StaticInstPtr
680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682 {
683 if (q) {
684 if (size)
685 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686 else
687 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688 } else {
689 if (size)
690 return new Unknown(machInst);
691 else
692 return new BaseD<uint32_t>(machInst, dest, op1, imm);
693 }
694 }
695
696 template <template <typename T> class Base>
697 StaticInstPtr
698 decodeNeonUTwoMiscUSReg(unsigned size,
699 ExtMachInst machInst, IntRegIndex dest,
700 IntRegIndex op1)
701 {
702 switch (size) {
703 case 0:
704 return new Base<uint8_t>(machInst, dest, op1);
705 case 1:
706 return new Base<uint16_t>(machInst, dest, op1);
707 case 2:
708 return new Base<uint32_t>(machInst, dest, op1);
709 default:
710 return new Unknown(machInst);
711 }
712 }
713
714 template <template <typename T> class Base>
715 StaticInstPtr
716 decodeNeonSTwoMiscUSReg(unsigned size,
717 ExtMachInst machInst, IntRegIndex dest,
718 IntRegIndex op1)
719 {
720 switch (size) {
721 case 0:
722 return new Base<int8_t>(machInst, dest, op1);
723 case 1:
724 return new Base<int16_t>(machInst, dest, op1);
725 case 2:
726 return new Base<int32_t>(machInst, dest, op1);
727 default:
728 return new Unknown(machInst);
729 }
730 }
731
732 template <template <typename T> class BaseD,
733 template <typename T> class BaseQ>
734 StaticInstPtr
735 decodeNeonUTwoMiscSReg(bool q, unsigned size,
736 ExtMachInst machInst, IntRegIndex dest,
737 IntRegIndex op1)
738 {
739 if (q) {
740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741 } else {
742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743 }
744 }
745
746 template <template <typename T> class BaseD,
747 template <typename T> class BaseQ>
748 StaticInstPtr
749 decodeNeonSTwoMiscSReg(bool q, unsigned size,
750 ExtMachInst machInst, IntRegIndex dest,
751 IntRegIndex op1)
752 {
753 if (q) {
754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755 } else {
756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757 }
758 }
759
760 template <template <typename T> class Base>
761 StaticInstPtr
762 decodeNeonUTwoMiscUReg(unsigned size,
763 ExtMachInst machInst, IntRegIndex dest,
764 IntRegIndex op1)
765 {
766 switch (size) {
767 case 0:
768 return new Base<uint8_t>(machInst, dest, op1);
769 case 1:
770 return new Base<uint16_t>(machInst, dest, op1);
771 case 2:
772 return new Base<uint32_t>(machInst, dest, op1);
773 case 3:
774 return new Base<uint64_t>(machInst, dest, op1);
775 default:
776 return new Unknown(machInst);
777 }
778 }
779
780 template <template <typename T> class Base>
781 StaticInstPtr
782 decodeNeonSTwoMiscUReg(unsigned size,
783 ExtMachInst machInst, IntRegIndex dest,
784 IntRegIndex op1)
785 {
786 switch (size) {
787 case 0:
788 return new Base<int8_t>(machInst, dest, op1);
789 case 1:
790 return new Base<int16_t>(machInst, dest, op1);
791 case 2:
792 return new Base<int32_t>(machInst, dest, op1);
793 case 3:
794 return new Base<int64_t>(machInst, dest, op1);
795 default:
796 return new Unknown(machInst);
797 }
798 }
799
800 template <template <typename T> class BaseD,
801 template <typename T> class BaseQ>
802 StaticInstPtr
803 decodeNeonSTwoMiscReg(bool q, unsigned size,
804 ExtMachInst machInst, IntRegIndex dest,
805 IntRegIndex op1)
806 {
807 if (q) {
808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809 } else {
810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811 }
812 }
813
814 template <template <typename T> class BaseD,
815 template <typename T> class BaseQ>
816 StaticInstPtr
817 decodeNeonUTwoMiscReg(bool q, unsigned size,
818 ExtMachInst machInst, IntRegIndex dest,
819 IntRegIndex op1)
820 {
821 if (q) {
822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823 } else {
824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825 }
826 }
827
828 template <template <typename T> class BaseD,
829 template <typename T> class BaseQ>
830 StaticInstPtr
831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832 ExtMachInst machInst, IntRegIndex dest,
833 IntRegIndex op1)
834 {
835 if (notSigned) {
836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837 q, size, machInst, dest, op1);
838 } else {
839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840 q, size, machInst, dest, op1);
841 }
842 }
843
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
846 StaticInstPtr
847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848 IntRegIndex dest, IntRegIndex op1)
849 {
850 if (q) {
851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852 } else {
853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854 }
855 }
856
857 template <template <typename T> class BaseD,
858 template <typename T> class BaseQ>
859 StaticInstPtr
860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861 IntRegIndex dest, IntRegIndex op1)
862 {
863 if (q) {
864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865 } else {
866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867 }
868 }
869
870 template <template <typename T> class BaseD,
871 template <typename T> class BaseQ>
872 StaticInstPtr
873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874 IntRegIndex dest, IntRegIndex op1)
875 {
876 if (q) {
877 if (size)
878 return new BaseQ<uint64_t>(machInst, dest, op1);
879 else
880 return new BaseQ<uint32_t>(machInst, dest, op1);
881 } else {
882 if (size)
883 return new Unknown(machInst);
884 else
885 return new BaseD<uint32_t>(machInst, dest, op1);
886 }
887 }
888
889 template <template <typename T> class BaseD,
890 template <typename T> class BaseQ>
891 StaticInstPtr
892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893 IntRegIndex dest, IntRegIndex op1)
894 {
895 if (size)
896 return new BaseQ<uint64_t>(machInst, dest, op1);
897 else
898 return new BaseD<uint32_t>(machInst, dest, op1);
899 }
900
901 template <template <typename T> class Base>
902 StaticInstPtr
903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904 IntRegIndex dest, IntRegIndex op1)
905 {
906 if (size)
907 return new Base<uint64_t>(machInst, dest, op1);
908 else
909 return new Base<uint32_t>(machInst, dest, op1);
910 }
911
912 template <template <typename T> class BaseD,
913 template <typename T> class BaseQ>
914 StaticInstPtr
915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1)
917 {
918 if (q) {
919 switch (size) {
920 case 0x0:
921 return new BaseQ<uint8_t>(machInst, dest, op1);
922 case 0x1:
923 return new BaseQ<uint16_t>(machInst, dest, op1);
924 case 0x2:
925 return new BaseQ<uint32_t>(machInst, dest, op1);
926 default:
927 return new Unknown(machInst);
928 }
929 } else {
930 switch (size) {
931 case 0x0:
932 return new BaseD<uint8_t>(machInst, dest, op1);
933 case 0x1:
934 return new BaseD<uint16_t>(machInst, dest, op1);
935 default:
936 return new Unknown(machInst);
937 }
938 }
939 }
940
941 template <template <typename T> class BaseD,
942 template <typename T> class BaseQ,
943 template <typename T> class BaseBQ>
944 StaticInstPtr
945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946 IntRegIndex dest, IntRegIndex op1)
947 {
948 if (q) {
949 switch (size) {
950 case 0x0:
951 return new BaseQ<uint8_t>(machInst, dest, op1);
952 case 0x1:
953 return new BaseQ<uint16_t>(machInst, dest, op1);
954 case 0x2:
955 return new BaseBQ<uint32_t>(machInst, dest, op1);
956 default:
957 return new Unknown(machInst);
958 }
959 } else {
960 switch (size) {
961 case 0x0:
962 return new BaseD<uint8_t>(machInst, dest, op1);
963 case 0x1:
964 return new BaseD<uint16_t>(machInst, dest, op1);
965 default:
966 return new Unknown(machInst);
967 }
968 }
969 }
970
971 template <template <typename T> class BaseD,
972 template <typename T> class BaseQ>
973 StaticInstPtr
974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975 IntRegIndex dest, IntRegIndex op1)
976 {
977 if (q) {
978 switch (size) {
979 case 0x0:
980 return new BaseQ<int8_t>(machInst, dest, op1);
981 case 0x1:
982 return new BaseQ<int16_t>(machInst, dest, op1);
983 case 0x2:
984 return new BaseQ<int32_t>(machInst, dest, op1);
985 default:
986 return new Unknown(machInst);
987 }
988 } else {
989 switch (size) {
990 case 0x0:
991 return new BaseD<int8_t>(machInst, dest, op1);
992 case 0x1:
993 return new BaseD<int16_t>(machInst, dest, op1);
994 default:
995 return new Unknown(machInst);
996 }
997 }
998 }
999
1000 template <template <typename T> class BaseD,
1001 template <typename T> class BaseQ,
1002 template <typename T> class BaseBQ>
1003 StaticInstPtr
1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005 IntRegIndex dest, IntRegIndex op1)
1006 {
1007 if (q) {
1008 switch (size) {
1009 case 0x0:
1010 return new BaseQ<uint8_t>(machInst, dest, op1);
1011 case 0x1:
1012 return new BaseQ<uint16_t>(machInst, dest, op1);
1013 case 0x2:
1014 return new BaseBQ<uint32_t>(machInst, dest, op1);
1015 default:
1016 return new Unknown(machInst);
1017 }
1018 } else {
1019 switch (size) {
1020 case 0x0:
1021 return new BaseD<uint8_t>(machInst, dest, op1);
1022 case 0x1:
1023 return new BaseD<uint16_t>(machInst, dest, op1);
1024 default:
1025 return new Unknown(machInst);
1026 }
1027 }
1028 }
1029
1030 template <template <typename T> class BaseD,
1031 template <typename T> class BaseQ,
1032 template <typename T> class BaseBQ>
1033 StaticInstPtr
1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035 IntRegIndex dest, IntRegIndex op1)
1036 {
1037 if (q) {
1038 switch (size) {
1039 case 0x0:
1040 return new BaseQ<int8_t>(machInst, dest, op1);
1041 case 0x1:
1042 return new BaseQ<int16_t>(machInst, dest, op1);
1043 case 0x2:
1044 return new BaseBQ<int32_t>(machInst, dest, op1);
1045 default:
1046 return new Unknown(machInst);
1047 }
1048 } else {
1049 switch (size) {
1050 case 0x0:
1051 return new BaseD<int8_t>(machInst, dest, op1);
1052 case 0x1:
1053 return new BaseD<int16_t>(machInst, dest, op1);
1054 default:
1055 return new Unknown(machInst);
1056 }
1057 }
1058 }
1059}};
1060
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133 IntRegIndex dest, IntRegIndex op1,
134 IntRegIndex op2)
135 {
136 switch (size) {
137 case 1:
138 return new Base<int16_t>(machInst, dest, op1, op2);
139 case 2:
140 return new Base<int32_t>(machInst, dest, op1, op2);
141 default:
142 return new Unknown(machInst);
143 }
144 }
145
146 template <template <typename T> class Base>
147 StaticInstPtr
148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2, uint64_t imm)
151 {
152 switch (size) {
153 case 1:
154 return new Base<int16_t>(machInst, dest, op1, op2, imm);
155 case 2:
156 return new Base<int32_t>(machInst, dest, op1, op2, imm);
157 default:
158 return new Unknown(machInst);
159 }
160 }
161
162 template <template <typename T> class Base>
163 StaticInstPtr
164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (notSigned) {
169 return decodeNeonUThreeUSReg<Base>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<Base>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUThreeSReg(bool q, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (q) {
185 return decodeNeonUThreeUSReg<BaseQ>(
186 size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonUThreeUSReg<BaseD>(
189 size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonSThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonSThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonSThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeXReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (q) {
233 return decodeNeonUThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonUThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
247 {
248 if (notSigned) {
249 return decodeNeonUThreeSReg<BaseD, BaseQ>(
250 q, size, machInst, dest, op1, op2);
251 } else {
252 return decodeNeonSThreeSReg<BaseD, BaseQ>(
253 q, size, machInst, dest, op1, op2);
254 }
255 }
256
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
259 StaticInstPtr
260 decodeNeonUThreeReg(bool q, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
263 {
264 if (q) {
265 return decodeNeonUThreeUReg<BaseQ>(
266 size, machInst, dest, op1, op2);
267 } else {
268 return decodeNeonUThreeUReg<BaseD>(
269 size, machInst, dest, op1, op2);
270 }
271 }
272
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
275 StaticInstPtr
276 decodeNeonSThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
279 {
280 if (q) {
281 return decodeNeonSThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
283 } else {
284 return decodeNeonSThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
286 }
287 }
288
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
291 StaticInstPtr
292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
295 {
296 if (notSigned) {
297 return decodeNeonUThreeReg<BaseD, BaseQ>(
298 q, size, machInst, dest, op1, op2);
299 } else {
300 return decodeNeonSThreeReg<BaseD, BaseQ>(
301 q, size, machInst, dest, op1, op2);
302 }
303 }
304
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
307 StaticInstPtr
308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310 {
311 if (q) {
312 if (size)
313 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314 else
315 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316 } else {
317 if (size)
318 return new Unknown(machInst);
319 else
320 return new BaseD<uint32_t>(machInst, dest, op1, op2);
321 }
322 }
323
324 template <template <typename T> class Base>
325 StaticInstPtr
326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328 {
329 if (size)
330 return new Base<uint64_t>(machInst, dest, op1, op2);
331 else
332 return new Base<uint32_t>(machInst, dest, op1, op2);
333 }
334
335 template <template <typename T> class Base>
336 StaticInstPtr
337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338 IntRegIndex dest, IntRegIndex op1,
339 IntRegIndex op2, uint64_t imm)
340 {
341 if (size)
342 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343 else
344 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345 }
346
347 template <template <typename T> class BaseD,
348 template <typename T> class BaseQ>
349 StaticInstPtr
350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351 IntRegIndex dest, IntRegIndex op1,
352 IntRegIndex op2, uint64_t imm)
353 {
354 if (q) {
355 switch (size) {
356 case 1:
357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358 case 2:
359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360 default:
361 return new Unknown(machInst);
362 }
363 } else {
364 switch (size) {
365 case 1:
366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367 case 2:
368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369 default:
370 return new Unknown(machInst);
371 }
372 }
373 }
374
375 template <template <typename T> class BaseD,
376 template <typename T> class BaseQ>
377 StaticInstPtr
378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379 IntRegIndex dest, IntRegIndex op1,
380 IntRegIndex op2, uint64_t imm)
381 {
382 if (q) {
383 switch (size) {
384 case 1:
385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386 case 2:
387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388 default:
389 return new Unknown(machInst);
390 }
391 } else {
392 switch (size) {
393 case 1:
394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395 case 2:
396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397 default:
398 return new Unknown(machInst);
399 }
400 }
401 }
402
403 template <template <typename T> class BaseD,
404 template <typename T> class BaseQ>
405 StaticInstPtr
406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407 IntRegIndex dest, IntRegIndex op1,
408 IntRegIndex op2, uint64_t imm)
409 {
410 if (q) {
411 if (size)
412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413 else
414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415 } else {
416 if (size)
417 return new Unknown(machInst);
418 else
419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420 }
421 }
422
423 template <template <typename T> class BaseD,
424 template <typename T> class BaseQ>
425 StaticInstPtr
426 decodeNeonUTwoShiftReg(bool q, unsigned size,
427 ExtMachInst machInst, IntRegIndex dest,
428 IntRegIndex op1, uint64_t imm)
429 {
430 if (q) {
431 switch (size) {
432 case 0:
433 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434 case 1:
435 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436 case 2:
437 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438 case 3:
439 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440 default:
441 return new Unknown(machInst);
442 }
443 } else {
444 switch (size) {
445 case 0:
446 return new BaseD<uint8_t>(machInst, dest, op1, imm);
447 case 1:
448 return new BaseD<uint16_t>(machInst, dest, op1, imm);
449 case 2:
450 return new BaseD<uint32_t>(machInst, dest, op1, imm);
451 case 3:
452 return new BaseD<uint64_t>(machInst, dest, op1, imm);
453 default:
454 return new Unknown(machInst);
455 }
456 }
457 }
458
459 template <template <typename T> class BaseD,
460 template <typename T> class BaseQ>
461 StaticInstPtr
462 decodeNeonSTwoShiftReg(bool q, unsigned size,
463 ExtMachInst machInst, IntRegIndex dest,
464 IntRegIndex op1, uint64_t imm)
465 {
466 if (q) {
467 switch (size) {
468 case 0:
469 return new BaseQ<int8_t>(machInst, dest, op1, imm);
470 case 1:
471 return new BaseQ<int16_t>(machInst, dest, op1, imm);
472 case 2:
473 return new BaseQ<int32_t>(machInst, dest, op1, imm);
474 case 3:
475 return new BaseQ<int64_t>(machInst, dest, op1, imm);
476 default:
477 return new Unknown(machInst);
478 }
479 } else {
480 switch (size) {
481 case 0:
482 return new BaseD<int8_t>(machInst, dest, op1, imm);
483 case 1:
484 return new BaseD<int16_t>(machInst, dest, op1, imm);
485 case 2:
486 return new BaseD<int32_t>(machInst, dest, op1, imm);
487 case 3:
488 return new BaseD<int64_t>(machInst, dest, op1, imm);
489 default:
490 return new Unknown(machInst);
491 }
492 }
493 }
494
495
496 template <template <typename T> class BaseD,
497 template <typename T> class BaseQ>
498 StaticInstPtr
499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500 ExtMachInst machInst, IntRegIndex dest,
501 IntRegIndex op1, uint64_t imm)
502 {
503 if (notSigned) {
504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505 q, size, machInst, dest, op1, imm);
506 } else {
507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508 q, size, machInst, dest, op1, imm);
509 }
510 }
511
512 template <template <typename T> class Base>
513 StaticInstPtr
514 decodeNeonUTwoShiftUSReg(unsigned size,
515 ExtMachInst machInst, IntRegIndex dest,
516 IntRegIndex op1, uint64_t imm)
517 {
518 switch (size) {
519 case 0:
520 return new Base<uint8_t>(machInst, dest, op1, imm);
521 case 1:
522 return new Base<uint16_t>(machInst, dest, op1, imm);
523 case 2:
524 return new Base<uint32_t>(machInst, dest, op1, imm);
525 default:
526 return new Unknown(machInst);
527 }
528 }
529
530 template <template <typename T> class Base>
531 StaticInstPtr
532 decodeNeonUTwoShiftUReg(unsigned size,
533 ExtMachInst machInst, IntRegIndex dest,
534 IntRegIndex op1, uint64_t imm)
535 {
536 switch (size) {
537 case 0:
538 return new Base<uint8_t>(machInst, dest, op1, imm);
539 case 1:
540 return new Base<uint16_t>(machInst, dest, op1, imm);
541 case 2:
542 return new Base<uint32_t>(machInst, dest, op1, imm);
543 case 3:
544 return new Base<uint64_t>(machInst, dest, op1, imm);
545 default:
546 return new Unknown(machInst);
547 }
548 }
549
550 template <template <typename T> class Base>
551 StaticInstPtr
552 decodeNeonSTwoShiftUReg(unsigned size,
553 ExtMachInst machInst, IntRegIndex dest,
554 IntRegIndex op1, uint64_t imm)
555 {
556 switch (size) {
557 case 0:
558 return new Base<int8_t>(machInst, dest, op1, imm);
559 case 1:
560 return new Base<int16_t>(machInst, dest, op1, imm);
561 case 2:
562 return new Base<int32_t>(machInst, dest, op1, imm);
563 case 3:
564 return new Base<int64_t>(machInst, dest, op1, imm);
565 default:
566 return new Unknown(machInst);
567 }
568 }
569
570 template <template <typename T> class BaseD,
571 template <typename T> class BaseQ>
572 StaticInstPtr
573 decodeNeonUTwoShiftSReg(bool q, unsigned size,
574 ExtMachInst machInst, IntRegIndex dest,
575 IntRegIndex op1, uint64_t imm)
576 {
577 if (q) {
578 return decodeNeonUTwoShiftUSReg<BaseQ>(
579 size, machInst, dest, op1, imm);
580 } else {
581 return decodeNeonUTwoShiftUSReg<BaseD>(
582 size, machInst, dest, op1, imm);
583 }
584 }
585
586 template <template <typename T> class Base>
587 StaticInstPtr
588 decodeNeonSTwoShiftUSReg(unsigned size,
589 ExtMachInst machInst, IntRegIndex dest,
590 IntRegIndex op1, uint64_t imm)
591 {
592 switch (size) {
593 case 0:
594 return new Base<int8_t>(machInst, dest, op1, imm);
595 case 1:
596 return new Base<int16_t>(machInst, dest, op1, imm);
597 case 2:
598 return new Base<int32_t>(machInst, dest, op1, imm);
599 default:
600 return new Unknown(machInst);
601 }
602 }
603
604 template <template <typename T> class BaseD,
605 template <typename T> class BaseQ>
606 StaticInstPtr
607 decodeNeonSTwoShiftSReg(bool q, unsigned size,
608 ExtMachInst machInst, IntRegIndex dest,
609 IntRegIndex op1, uint64_t imm)
610 {
611 if (q) {
612 return decodeNeonSTwoShiftUSReg<BaseQ>(
613 size, machInst, dest, op1, imm);
614 } else {
615 return decodeNeonSTwoShiftUSReg<BaseD>(
616 size, machInst, dest, op1, imm);
617 }
618 }
619
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
622 StaticInstPtr
623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
626 {
627 if (notSigned) {
628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629 q, size, machInst, dest, op1, imm);
630 } else {
631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632 q, size, machInst, dest, op1, imm);
633 }
634 }
635
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
638 StaticInstPtr
639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641 {
642 if (q) {
643 return decodeNeonUTwoShiftUReg<BaseQ>(
644 size, machInst, dest, op1, imm);
645 } else {
646 return decodeNeonUTwoShiftUSReg<BaseD>(
647 size, machInst, dest, op1, imm);
648 }
649 }
650
651 template <template <typename T> class BaseD,
652 template <typename T> class BaseQ>
653 StaticInstPtr
654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656 {
657 if (q) {
658 return decodeNeonSTwoShiftUReg<BaseQ>(
659 size, machInst, dest, op1, imm);
660 } else {
661 return decodeNeonSTwoShiftUSReg<BaseD>(
662 size, machInst, dest, op1, imm);
663 }
664 }
665
666 template <template <typename T> class Base>
667 StaticInstPtr
668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670 {
671 if (size)
672 return new Base<uint64_t>(machInst, dest, op1, imm);
673 else
674 return new Base<uint32_t>(machInst, dest, op1, imm);
675 }
676
677 template <template <typename T> class BaseD,
678 template <typename T> class BaseQ>
679 StaticInstPtr
680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682 {
683 if (q) {
684 if (size)
685 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686 else
687 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688 } else {
689 if (size)
690 return new Unknown(machInst);
691 else
692 return new BaseD<uint32_t>(machInst, dest, op1, imm);
693 }
694 }
695
696 template <template <typename T> class Base>
697 StaticInstPtr
698 decodeNeonUTwoMiscUSReg(unsigned size,
699 ExtMachInst machInst, IntRegIndex dest,
700 IntRegIndex op1)
701 {
702 switch (size) {
703 case 0:
704 return new Base<uint8_t>(machInst, dest, op1);
705 case 1:
706 return new Base<uint16_t>(machInst, dest, op1);
707 case 2:
708 return new Base<uint32_t>(machInst, dest, op1);
709 default:
710 return new Unknown(machInst);
711 }
712 }
713
714 template <template <typename T> class Base>
715 StaticInstPtr
716 decodeNeonSTwoMiscUSReg(unsigned size,
717 ExtMachInst machInst, IntRegIndex dest,
718 IntRegIndex op1)
719 {
720 switch (size) {
721 case 0:
722 return new Base<int8_t>(machInst, dest, op1);
723 case 1:
724 return new Base<int16_t>(machInst, dest, op1);
725 case 2:
726 return new Base<int32_t>(machInst, dest, op1);
727 default:
728 return new Unknown(machInst);
729 }
730 }
731
732 template <template <typename T> class BaseD,
733 template <typename T> class BaseQ>
734 StaticInstPtr
735 decodeNeonUTwoMiscSReg(bool q, unsigned size,
736 ExtMachInst machInst, IntRegIndex dest,
737 IntRegIndex op1)
738 {
739 if (q) {
740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741 } else {
742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743 }
744 }
745
746 template <template <typename T> class BaseD,
747 template <typename T> class BaseQ>
748 StaticInstPtr
749 decodeNeonSTwoMiscSReg(bool q, unsigned size,
750 ExtMachInst machInst, IntRegIndex dest,
751 IntRegIndex op1)
752 {
753 if (q) {
754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755 } else {
756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757 }
758 }
759
760 template <template <typename T> class Base>
761 StaticInstPtr
762 decodeNeonUTwoMiscUReg(unsigned size,
763 ExtMachInst machInst, IntRegIndex dest,
764 IntRegIndex op1)
765 {
766 switch (size) {
767 case 0:
768 return new Base<uint8_t>(machInst, dest, op1);
769 case 1:
770 return new Base<uint16_t>(machInst, dest, op1);
771 case 2:
772 return new Base<uint32_t>(machInst, dest, op1);
773 case 3:
774 return new Base<uint64_t>(machInst, dest, op1);
775 default:
776 return new Unknown(machInst);
777 }
778 }
779
780 template <template <typename T> class Base>
781 StaticInstPtr
782 decodeNeonSTwoMiscUReg(unsigned size,
783 ExtMachInst machInst, IntRegIndex dest,
784 IntRegIndex op1)
785 {
786 switch (size) {
787 case 0:
788 return new Base<int8_t>(machInst, dest, op1);
789 case 1:
790 return new Base<int16_t>(machInst, dest, op1);
791 case 2:
792 return new Base<int32_t>(machInst, dest, op1);
793 case 3:
794 return new Base<int64_t>(machInst, dest, op1);
795 default:
796 return new Unknown(machInst);
797 }
798 }
799
800 template <template <typename T> class BaseD,
801 template <typename T> class BaseQ>
802 StaticInstPtr
803 decodeNeonSTwoMiscReg(bool q, unsigned size,
804 ExtMachInst machInst, IntRegIndex dest,
805 IntRegIndex op1)
806 {
807 if (q) {
808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809 } else {
810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811 }
812 }
813
814 template <template <typename T> class BaseD,
815 template <typename T> class BaseQ>
816 StaticInstPtr
817 decodeNeonUTwoMiscReg(bool q, unsigned size,
818 ExtMachInst machInst, IntRegIndex dest,
819 IntRegIndex op1)
820 {
821 if (q) {
822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823 } else {
824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825 }
826 }
827
828 template <template <typename T> class BaseD,
829 template <typename T> class BaseQ>
830 StaticInstPtr
831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832 ExtMachInst machInst, IntRegIndex dest,
833 IntRegIndex op1)
834 {
835 if (notSigned) {
836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837 q, size, machInst, dest, op1);
838 } else {
839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840 q, size, machInst, dest, op1);
841 }
842 }
843
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
846 StaticInstPtr
847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848 IntRegIndex dest, IntRegIndex op1)
849 {
850 if (q) {
851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852 } else {
853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854 }
855 }
856
857 template <template <typename T> class BaseD,
858 template <typename T> class BaseQ>
859 StaticInstPtr
860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861 IntRegIndex dest, IntRegIndex op1)
862 {
863 if (q) {
864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865 } else {
866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867 }
868 }
869
870 template <template <typename T> class BaseD,
871 template <typename T> class BaseQ>
872 StaticInstPtr
873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874 IntRegIndex dest, IntRegIndex op1)
875 {
876 if (q) {
877 if (size)
878 return new BaseQ<uint64_t>(machInst, dest, op1);
879 else
880 return new BaseQ<uint32_t>(machInst, dest, op1);
881 } else {
882 if (size)
883 return new Unknown(machInst);
884 else
885 return new BaseD<uint32_t>(machInst, dest, op1);
886 }
887 }
888
889 template <template <typename T> class BaseD,
890 template <typename T> class BaseQ>
891 StaticInstPtr
892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893 IntRegIndex dest, IntRegIndex op1)
894 {
895 if (size)
896 return new BaseQ<uint64_t>(machInst, dest, op1);
897 else
898 return new BaseD<uint32_t>(machInst, dest, op1);
899 }
900
901 template <template <typename T> class Base>
902 StaticInstPtr
903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904 IntRegIndex dest, IntRegIndex op1)
905 {
906 if (size)
907 return new Base<uint64_t>(machInst, dest, op1);
908 else
909 return new Base<uint32_t>(machInst, dest, op1);
910 }
911
912 template <template <typename T> class BaseD,
913 template <typename T> class BaseQ>
914 StaticInstPtr
915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1)
917 {
918 if (q) {
919 switch (size) {
920 case 0x0:
921 return new BaseQ<uint8_t>(machInst, dest, op1);
922 case 0x1:
923 return new BaseQ<uint16_t>(machInst, dest, op1);
924 case 0x2:
925 return new BaseQ<uint32_t>(machInst, dest, op1);
926 default:
927 return new Unknown(machInst);
928 }
929 } else {
930 switch (size) {
931 case 0x0:
932 return new BaseD<uint8_t>(machInst, dest, op1);
933 case 0x1:
934 return new BaseD<uint16_t>(machInst, dest, op1);
935 default:
936 return new Unknown(machInst);
937 }
938 }
939 }
940
941 template <template <typename T> class BaseD,
942 template <typename T> class BaseQ,
943 template <typename T> class BaseBQ>
944 StaticInstPtr
945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946 IntRegIndex dest, IntRegIndex op1)
947 {
948 if (q) {
949 switch (size) {
950 case 0x0:
951 return new BaseQ<uint8_t>(machInst, dest, op1);
952 case 0x1:
953 return new BaseQ<uint16_t>(machInst, dest, op1);
954 case 0x2:
955 return new BaseBQ<uint32_t>(machInst, dest, op1);
956 default:
957 return new Unknown(machInst);
958 }
959 } else {
960 switch (size) {
961 case 0x0:
962 return new BaseD<uint8_t>(machInst, dest, op1);
963 case 0x1:
964 return new BaseD<uint16_t>(machInst, dest, op1);
965 default:
966 return new Unknown(machInst);
967 }
968 }
969 }
970
971 template <template <typename T> class BaseD,
972 template <typename T> class BaseQ>
973 StaticInstPtr
974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975 IntRegIndex dest, IntRegIndex op1)
976 {
977 if (q) {
978 switch (size) {
979 case 0x0:
980 return new BaseQ<int8_t>(machInst, dest, op1);
981 case 0x1:
982 return new BaseQ<int16_t>(machInst, dest, op1);
983 case 0x2:
984 return new BaseQ<int32_t>(machInst, dest, op1);
985 default:
986 return new Unknown(machInst);
987 }
988 } else {
989 switch (size) {
990 case 0x0:
991 return new BaseD<int8_t>(machInst, dest, op1);
992 case 0x1:
993 return new BaseD<int16_t>(machInst, dest, op1);
994 default:
995 return new Unknown(machInst);
996 }
997 }
998 }
999
1000 template <template <typename T> class BaseD,
1001 template <typename T> class BaseQ,
1002 template <typename T> class BaseBQ>
1003 StaticInstPtr
1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005 IntRegIndex dest, IntRegIndex op1)
1006 {
1007 if (q) {
1008 switch (size) {
1009 case 0x0:
1010 return new BaseQ<uint8_t>(machInst, dest, op1);
1011 case 0x1:
1012 return new BaseQ<uint16_t>(machInst, dest, op1);
1013 case 0x2:
1014 return new BaseBQ<uint32_t>(machInst, dest, op1);
1015 default:
1016 return new Unknown(machInst);
1017 }
1018 } else {
1019 switch (size) {
1020 case 0x0:
1021 return new BaseD<uint8_t>(machInst, dest, op1);
1022 case 0x1:
1023 return new BaseD<uint16_t>(machInst, dest, op1);
1024 default:
1025 return new Unknown(machInst);
1026 }
1027 }
1028 }
1029
1030 template <template <typename T> class BaseD,
1031 template <typename T> class BaseQ,
1032 template <typename T> class BaseBQ>
1033 StaticInstPtr
1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035 IntRegIndex dest, IntRegIndex op1)
1036 {
1037 if (q) {
1038 switch (size) {
1039 case 0x0:
1040 return new BaseQ<int8_t>(machInst, dest, op1);
1041 case 0x1:
1042 return new BaseQ<int16_t>(machInst, dest, op1);
1043 case 0x2:
1044 return new BaseBQ<int32_t>(machInst, dest, op1);
1045 default:
1046 return new Unknown(machInst);
1047 }
1048 } else {
1049 switch (size) {
1050 case 0x0:
1051 return new BaseD<int8_t>(machInst, dest, op1);
1052 case 0x1:
1053 return new BaseD<int16_t>(machInst, dest, op1);
1054 default:
1055 return new Unknown(machInst);
1056 }
1057 }
1058 }
1059}};
1060
1061output exec {{
1061let {{
1062 header_output = ""
1063 exec_output = ""
1064
1065 vcompares = '''
1062 static float
1063 vcgtFunc(float op1, float op2)
1064 {
1065 if (std::isnan(op1) || std::isnan(op2))
1066 return 2.0;
1067 return (op1 > op2) ? 0.0 : 1.0;
1068 }
1069
1070 static float
1071 vcgeFunc(float op1, float op2)
1072 {
1073 if (std::isnan(op1) || std::isnan(op2))
1074 return 2.0;
1075 return (op1 >= op2) ? 0.0 : 1.0;
1076 }
1077
1078 static float
1079 vceqFunc(float op1, float op2)
1080 {
1081 if (isSnan(op1) || isSnan(op2))
1082 return 2.0;
1083 return (op1 == op2) ? 0.0 : 1.0;
1084 }
1066 static float
1067 vcgtFunc(float op1, float op2)
1068 {
1069 if (std::isnan(op1) || std::isnan(op2))
1070 return 2.0;
1071 return (op1 > op2) ? 0.0 : 1.0;
1072 }
1073
1074 static float
1075 vcgeFunc(float op1, float op2)
1076 {
1077 if (std::isnan(op1) || std::isnan(op2))
1078 return 2.0;
1079 return (op1 >= op2) ? 0.0 : 1.0;
1080 }
1081
1082 static float
1083 vceqFunc(float op1, float op2)
1084 {
1085 if (isSnan(op1) || isSnan(op2))
1086 return 2.0;
1087 return (op1 == op2) ? 0.0 : 1.0;
1088 }
1085
1089'''
1090 vcomparesL = '''
1086 static float
1087 vcleFunc(float op1, float op2)
1088 {
1089 if (std::isnan(op1) || std::isnan(op2))
1090 return 2.0;
1091 return (op1 <= op2) ? 0.0 : 1.0;
1092 }
1093
1094 static float
1095 vcltFunc(float op1, float op2)
1096 {
1097 if (std::isnan(op1) || std::isnan(op2))
1098 return 2.0;
1099 return (op1 < op2) ? 0.0 : 1.0;
1100 }
1091 static float
1092 vcleFunc(float op1, float op2)
1093 {
1094 if (std::isnan(op1) || std::isnan(op2))
1095 return 2.0;
1096 return (op1 <= op2) ? 0.0 : 1.0;
1097 }
1098
1099 static float
1100 vcltFunc(float op1, float op2)
1101 {
1102 if (std::isnan(op1) || std::isnan(op2))
1103 return 2.0;
1104 return (op1 < op2) ? 0.0 : 1.0;
1105 }
1101
1106'''
1107 vacomparesG = '''
1102 static float
1103 vacgtFunc(float op1, float op2)
1104 {
1105 if (std::isnan(op1) || std::isnan(op2))
1106 return 2.0;
1107 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1108 }
1109
1110 static float
1111 vacgeFunc(float op1, float op2)
1112 {
1113 if (std::isnan(op1) || std::isnan(op2))
1114 return 2.0;
1115 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1116 }
1108 static float
1109 vacgtFunc(float op1, float op2)
1110 {
1111 if (std::isnan(op1) || std::isnan(op2))
1112 return 2.0;
1113 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114 }
1115
1116 static float
1117 vacgeFunc(float op1, float op2)
1118 {
1119 if (std::isnan(op1) || std::isnan(op2))
1120 return 2.0;
1121 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122 }
1117}};
1123'''
1118
1124
1119let {{
1125 exec_output += vcompares + vacomparesG
1120
1126
1121 header_output = ""
1122 exec_output = ""
1123
1124 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1125 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1126 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1127 signedTypes = smallSignedTypes + ("int64_t",)
1128 smallTypes = smallUnsignedTypes + smallSignedTypes
1129 allTypes = unsignedTypes + signedTypes
1130
1131 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1132 readDest=False, pairwise=False):
1133 global header_output, exec_output
1134 eWalkCode = simdEnabledCheckCode + '''
1135 RegVect srcReg1, srcReg2, destReg;
1136 '''
1137 for reg in range(rCount):
1138 eWalkCode += '''
1139 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1140 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1141 ''' % { "reg" : reg }
1142 if readDest:
1143 eWalkCode += '''
1144 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1145 ''' % { "reg" : reg }
1146 readDestCode = ''
1147 if readDest:
1148 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1149 if pairwise:
1150 eWalkCode += '''
1151 for (unsigned i = 0; i < eCount; i++) {
1152 Element srcElem1 = gtoh(2 * i < eCount ?
1153 srcReg1.elements[2 * i] :
1154 srcReg2.elements[2 * i - eCount]);
1155 Element srcElem2 = gtoh(2 * i < eCount ?
1156 srcReg1.elements[2 * i + 1] :
1157 srcReg2.elements[2 * i + 1 - eCount]);
1158 Element destElem;
1159 %(readDest)s
1160 %(op)s
1161 destReg.elements[i] = htog(destElem);
1162 }
1163 ''' % { "op" : op, "readDest" : readDestCode }
1164 else:
1165 eWalkCode += '''
1166 for (unsigned i = 0; i < eCount; i++) {
1167 Element srcElem1 = gtoh(srcReg1.elements[i]);
1168 Element srcElem2 = gtoh(srcReg2.elements[i]);
1169 Element destElem;
1170 %(readDest)s
1171 %(op)s
1172 destReg.elements[i] = htog(destElem);
1173 }
1174 ''' % { "op" : op, "readDest" : readDestCode }
1175 for reg in range(rCount):
1176 eWalkCode += '''
1177 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1178 ''' % { "reg" : reg }
1179 iop = InstObjParams(name, Name,
1180 "RegRegRegOp",
1181 { "code": eWalkCode,
1182 "r_count": rCount,
1183 "predicate_test": predicateTest,
1184 "op_class": opClass }, [])
1185 header_output += NeonRegRegRegOpDeclare.subst(iop)
1186 exec_output += NeonEqualRegExecute.subst(iop)
1187 for type in types:
1188 substDict = { "targs" : type,
1189 "class_name" : Name }
1190 exec_output += NeonExecDeclare.subst(substDict)
1191
1192 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1193 readDest=False, pairwise=False, toInt=False):
1194 global header_output, exec_output
1195 eWalkCode = simdEnabledCheckCode + '''
1196 typedef FloatReg FloatVect[rCount];
1197 FloatVect srcRegs1, srcRegs2;
1198 '''
1199 if toInt:
1200 eWalkCode += 'RegVect destRegs;\n'
1201 else:
1202 eWalkCode += 'FloatVect destRegs;\n'
1203 for reg in range(rCount):
1204 eWalkCode += '''
1205 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1206 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1207 ''' % { "reg" : reg }
1208 if readDest:
1209 if toInt:
1210 eWalkCode += '''
1211 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1212 ''' % { "reg" : reg }
1213 else:
1214 eWalkCode += '''
1215 destRegs[%(reg)d] = FpDestP%(reg)d;
1216 ''' % { "reg" : reg }
1217 readDestCode = ''
1218 if readDest:
1219 readDestCode = 'destReg = destRegs[r];'
1220 destType = 'FloatReg'
1221 writeDest = 'destRegs[r] = destReg;'
1222 if toInt:
1223 destType = 'FloatRegBits'
1224 writeDest = 'destRegs.regs[r] = destReg;'
1225 if pairwise:
1226 eWalkCode += '''
1227 for (unsigned r = 0; r < rCount; r++) {
1228 FloatReg srcReg1 = (2 * r < rCount) ?
1229 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1230 FloatReg srcReg2 = (2 * r < rCount) ?
1231 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1232 %(destType)s destReg;
1233 %(readDest)s
1234 %(op)s
1235 %(writeDest)s
1236 }
1237 ''' % { "op" : op,
1238 "readDest" : readDestCode,
1239 "destType" : destType,
1240 "writeDest" : writeDest }
1241 else:
1242 eWalkCode += '''
1243 for (unsigned r = 0; r < rCount; r++) {
1244 FloatReg srcReg1 = srcRegs1[r];
1245 FloatReg srcReg2 = srcRegs2[r];
1246 %(destType)s destReg;
1247 %(readDest)s
1248 %(op)s
1249 %(writeDest)s
1250 }
1251 ''' % { "op" : op,
1252 "readDest" : readDestCode,
1253 "destType" : destType,
1254 "writeDest" : writeDest }
1255 for reg in range(rCount):
1256 if toInt:
1257 eWalkCode += '''
1258 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1259 ''' % { "reg" : reg }
1260 else:
1261 eWalkCode += '''
1262 FpDestP%(reg)d = destRegs[%(reg)d];
1263 ''' % { "reg" : reg }
1264 iop = InstObjParams(name, Name,
1265 "FpRegRegRegOp",
1266 { "code": eWalkCode,
1267 "r_count": rCount,
1268 "predicate_test": predicateTest,
1269 "op_class": opClass }, [])
1270 header_output += NeonRegRegRegOpDeclare.subst(iop)
1271 exec_output += NeonEqualRegExecute.subst(iop)
1272 for type in types:
1273 substDict = { "targs" : type,
1274 "class_name" : Name }
1275 exec_output += NeonExecDeclare.subst(substDict)
1276
1277 def threeUnequalRegInst(name, Name, opClass, types, op,
1278 bigSrc1, bigSrc2, bigDest, readDest):
1279 global header_output, exec_output
1280 src1Cnt = src2Cnt = destCnt = 2
1281 src1Prefix = src2Prefix = destPrefix = ''
1282 if bigSrc1:
1283 src1Cnt = 4
1284 src1Prefix = 'Big'
1285 if bigSrc2:
1286 src2Cnt = 4
1287 src2Prefix = 'Big'
1288 if bigDest:
1289 destCnt = 4
1290 destPrefix = 'Big'
1291 eWalkCode = simdEnabledCheckCode + '''
1292 %sRegVect srcReg1;
1293 %sRegVect srcReg2;
1294 %sRegVect destReg;
1295 ''' % (src1Prefix, src2Prefix, destPrefix)
1296 for reg in range(src1Cnt):
1297 eWalkCode += '''
1298 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1299 ''' % { "reg" : reg }
1300 for reg in range(src2Cnt):
1301 eWalkCode += '''
1302 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1303 ''' % { "reg" : reg }
1304 if readDest:
1305 for reg in range(destCnt):
1306 eWalkCode += '''
1307 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1308 ''' % { "reg" : reg }
1309 readDestCode = ''
1310 if readDest:
1311 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1312 eWalkCode += '''
1313 for (unsigned i = 0; i < eCount; i++) {
1314 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1315 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1316 %(destPrefix)sElement destElem;
1317 %(readDest)s
1318 %(op)s
1319 destReg.elements[i] = htog(destElem);
1320 }
1321 ''' % { "op" : op, "readDest" : readDestCode,
1322 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1323 "destPrefix" : destPrefix }
1324 for reg in range(destCnt):
1325 eWalkCode += '''
1326 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1327 ''' % { "reg" : reg }
1328 iop = InstObjParams(name, Name,
1329 "RegRegRegOp",
1330 { "code": eWalkCode,
1331 "r_count": 2,
1332 "predicate_test": predicateTest,
1333 "op_class": opClass }, [])
1334 header_output += NeonRegRegRegOpDeclare.subst(iop)
1335 exec_output += NeonUnequalRegExecute.subst(iop)
1336 for type in types:
1337 substDict = { "targs" : type,
1338 "class_name" : Name }
1339 exec_output += NeonExecDeclare.subst(substDict)
1340
1341 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1342 threeUnequalRegInst(name, Name, opClass, types, op,
1343 True, True, False, readDest)
1344
1345 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1346 threeUnequalRegInst(name, Name, opClass, types, op,
1347 False, False, True, readDest)
1348
1349 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1350 threeUnequalRegInst(name, Name, opClass, types, op,
1351 True, False, True, readDest)
1352
1353 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1354 global header_output, exec_output
1355 eWalkCode = simdEnabledCheckCode + '''
1356 RegVect srcReg1, srcReg2, destReg;
1357 '''
1358 for reg in range(rCount):
1359 eWalkCode += '''
1360 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1361 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1362 ''' % { "reg" : reg }
1363 if readDest:
1364 eWalkCode += '''
1365 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1366 ''' % { "reg" : reg }
1367 readDestCode = ''
1368 if readDest:
1369 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1370 eWalkCode += '''
1371 if (imm < 0 && imm >= eCount) {
1372 fault = new UndefinedInstruction(machInst, false, mnemonic);
1373 } else {
1374 for (unsigned i = 0; i < eCount; i++) {
1375 Element srcElem1 = gtoh(srcReg1.elements[i]);
1376 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1377 Element destElem;
1378 %(readDest)s
1379 %(op)s
1380 destReg.elements[i] = htog(destElem);
1381 }
1382 }
1383 ''' % { "op" : op, "readDest" : readDestCode }
1384 for reg in range(rCount):
1385 eWalkCode += '''
1386 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1387 ''' % { "reg" : reg }
1388 iop = InstObjParams(name, Name,
1389 "RegRegRegImmOp",
1390 { "code": eWalkCode,
1391 "r_count": rCount,
1392 "predicate_test": predicateTest,
1393 "op_class": opClass }, [])
1394 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1395 exec_output += NeonEqualRegExecute.subst(iop)
1396 for type in types:
1397 substDict = { "targs" : type,
1398 "class_name" : Name }
1399 exec_output += NeonExecDeclare.subst(substDict)
1400
1401 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1402 global header_output, exec_output
1403 rCount = 2
1404 eWalkCode = simdEnabledCheckCode + '''
1405 RegVect srcReg1, srcReg2;
1406 BigRegVect destReg;
1407 '''
1408 for reg in range(rCount):
1409 eWalkCode += '''
1410 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1411 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1412 ''' % { "reg" : reg }
1413 if readDest:
1414 for reg in range(2 * rCount):
1415 eWalkCode += '''
1416 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1417 ''' % { "reg" : reg }
1418 readDestCode = ''
1419 if readDest:
1420 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1421 eWalkCode += '''
1422 if (imm < 0 && imm >= eCount) {
1423 fault = new UndefinedInstruction(machInst, false, mnemonic);
1424 } else {
1425 for (unsigned i = 0; i < eCount; i++) {
1426 Element srcElem1 = gtoh(srcReg1.elements[i]);
1427 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1428 BigElement destElem;
1429 %(readDest)s
1430 %(op)s
1431 destReg.elements[i] = htog(destElem);
1432 }
1433 }
1434 ''' % { "op" : op, "readDest" : readDestCode }
1435 for reg in range(2 * rCount):
1436 eWalkCode += '''
1437 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1438 ''' % { "reg" : reg }
1439 iop = InstObjParams(name, Name,
1440 "RegRegRegImmOp",
1441 { "code": eWalkCode,
1442 "r_count": rCount,
1443 "predicate_test": predicateTest,
1444 "op_class": opClass }, [])
1445 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1446 exec_output += NeonUnequalRegExecute.subst(iop)
1447 for type in types:
1448 substDict = { "targs" : type,
1449 "class_name" : Name }
1450 exec_output += NeonExecDeclare.subst(substDict)
1451
1452 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1453 global header_output, exec_output
1454 eWalkCode = simdEnabledCheckCode + '''
1455 typedef FloatReg FloatVect[rCount];
1456 FloatVect srcRegs1, srcRegs2, destRegs;
1457 '''
1458 for reg in range(rCount):
1459 eWalkCode += '''
1460 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1461 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1462 ''' % { "reg" : reg }
1463 if readDest:
1464 eWalkCode += '''
1465 destRegs[%(reg)d] = FpDestP%(reg)d;
1466 ''' % { "reg" : reg }
1467 readDestCode = ''
1468 if readDest:
1469 readDestCode = 'destReg = destRegs[i];'
1470 eWalkCode += '''
1471 if (imm < 0 && imm >= eCount) {
1472 fault = new UndefinedInstruction(machInst, false, mnemonic);
1473 } else {
1474 for (unsigned i = 0; i < rCount; i++) {
1475 FloatReg srcReg1 = srcRegs1[i];
1476 FloatReg srcReg2 = srcRegs2[imm];
1477 FloatReg destReg;
1478 %(readDest)s
1479 %(op)s
1480 destRegs[i] = destReg;
1481 }
1482 }
1483 ''' % { "op" : op, "readDest" : readDestCode }
1484 for reg in range(rCount):
1485 eWalkCode += '''
1486 FpDestP%(reg)d = destRegs[%(reg)d];
1487 ''' % { "reg" : reg }
1488 iop = InstObjParams(name, Name,
1489 "FpRegRegRegImmOp",
1490 { "code": eWalkCode,
1491 "r_count": rCount,
1492 "predicate_test": predicateTest,
1493 "op_class": opClass }, [])
1494 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1495 exec_output += NeonEqualRegExecute.subst(iop)
1496 for type in types:
1497 substDict = { "targs" : type,
1498 "class_name" : Name }
1499 exec_output += NeonExecDeclare.subst(substDict)
1500
1501 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1502 readDest=False, toInt=False, fromInt=False):
1503 global header_output, exec_output
1504 eWalkCode = simdEnabledCheckCode + '''
1505 RegVect srcRegs1, destRegs;
1506 '''
1507 for reg in range(rCount):
1508 eWalkCode += '''
1509 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1510 ''' % { "reg" : reg }
1511 if readDest:
1512 eWalkCode += '''
1513 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1514 ''' % { "reg" : reg }
1515 readDestCode = ''
1516 if readDest:
1517 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1518 if toInt:
1519 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1520 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1521 if fromInt:
1522 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1523 declDest = 'Element destElem;'
1524 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1525 if toInt:
1526 declDest = 'FloatRegBits destReg;'
1527 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1528 eWalkCode += '''
1529 for (unsigned i = 0; i < eCount; i++) {
1530 %(readOp)s
1531 %(declDest)s
1532 %(readDest)s
1533 %(op)s
1534 %(writeDest)s
1535 }
1536 ''' % { "readOp" : readOpCode,
1537 "declDest" : declDest,
1538 "readDest" : readDestCode,
1539 "op" : op,
1540 "writeDest" : writeDestCode }
1541 for reg in range(rCount):
1542 eWalkCode += '''
1543 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1544 ''' % { "reg" : reg }
1545 iop = InstObjParams(name, Name,
1546 "RegRegImmOp",
1547 { "code": eWalkCode,
1548 "r_count": rCount,
1549 "predicate_test": predicateTest,
1550 "op_class": opClass }, [])
1551 header_output += NeonRegRegImmOpDeclare.subst(iop)
1552 exec_output += NeonEqualRegExecute.subst(iop)
1553 for type in types:
1554 substDict = { "targs" : type,
1555 "class_name" : Name }
1556 exec_output += NeonExecDeclare.subst(substDict)
1557
1558 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1559 global header_output, exec_output
1560 eWalkCode = simdEnabledCheckCode + '''
1561 BigRegVect srcReg1;
1562 RegVect destReg;
1563 '''
1564 for reg in range(4):
1565 eWalkCode += '''
1566 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1567 ''' % { "reg" : reg }
1568 if readDest:
1569 for reg in range(2):
1570 eWalkCode += '''
1571 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1572 ''' % { "reg" : reg }
1573 readDestCode = ''
1574 if readDest:
1575 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1576 eWalkCode += '''
1577 for (unsigned i = 0; i < eCount; i++) {
1578 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1579 Element destElem;
1580 %(readDest)s
1581 %(op)s
1582 destReg.elements[i] = htog(destElem);
1583 }
1584 ''' % { "op" : op, "readDest" : readDestCode }
1585 for reg in range(2):
1586 eWalkCode += '''
1587 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1588 ''' % { "reg" : reg }
1589 iop = InstObjParams(name, Name,
1590 "RegRegImmOp",
1591 { "code": eWalkCode,
1592 "r_count": 2,
1593 "predicate_test": predicateTest,
1594 "op_class": opClass }, [])
1595 header_output += NeonRegRegImmOpDeclare.subst(iop)
1596 exec_output += NeonUnequalRegExecute.subst(iop)
1597 for type in types:
1598 substDict = { "targs" : type,
1599 "class_name" : Name }
1600 exec_output += NeonExecDeclare.subst(substDict)
1601
1602 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1603 global header_output, exec_output
1604 eWalkCode = simdEnabledCheckCode + '''
1605 RegVect srcReg1;
1606 BigRegVect destReg;
1607 '''
1608 for reg in range(2):
1609 eWalkCode += '''
1610 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1611 ''' % { "reg" : reg }
1612 if readDest:
1613 for reg in range(4):
1614 eWalkCode += '''
1615 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1616 ''' % { "reg" : reg }
1617 readDestCode = ''
1618 if readDest:
1619 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1620 eWalkCode += '''
1621 for (unsigned i = 0; i < eCount; i++) {
1622 Element srcElem1 = gtoh(srcReg1.elements[i]);
1623 BigElement destElem;
1624 %(readDest)s
1625 %(op)s
1626 destReg.elements[i] = htog(destElem);
1627 }
1628 ''' % { "op" : op, "readDest" : readDestCode }
1629 for reg in range(4):
1630 eWalkCode += '''
1631 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1632 ''' % { "reg" : reg }
1633 iop = InstObjParams(name, Name,
1634 "RegRegImmOp",
1635 { "code": eWalkCode,
1636 "r_count": 2,
1637 "predicate_test": predicateTest,
1638 "op_class": opClass }, [])
1639 header_output += NeonRegRegImmOpDeclare.subst(iop)
1640 exec_output += NeonUnequalRegExecute.subst(iop)
1641 for type in types:
1642 substDict = { "targs" : type,
1643 "class_name" : Name }
1644 exec_output += NeonExecDeclare.subst(substDict)
1645
1646 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1647 global header_output, exec_output
1648 eWalkCode = simdEnabledCheckCode + '''
1649 RegVect srcReg1, destReg;
1650 '''
1651 for reg in range(rCount):
1652 eWalkCode += '''
1653 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1654 ''' % { "reg" : reg }
1655 if readDest:
1656 eWalkCode += '''
1657 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1658 ''' % { "reg" : reg }
1659 readDestCode = ''
1660 if readDest:
1661 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1662 eWalkCode += '''
1663 for (unsigned i = 0; i < eCount; i++) {
1664 unsigned j = i;
1665 Element srcElem1 = gtoh(srcReg1.elements[i]);
1666 Element destElem;
1667 %(readDest)s
1668 %(op)s
1669 destReg.elements[j] = htog(destElem);
1670 }
1671 ''' % { "op" : op, "readDest" : readDestCode }
1672 for reg in range(rCount):
1673 eWalkCode += '''
1674 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1675 ''' % { "reg" : reg }
1676 iop = InstObjParams(name, Name,
1677 "RegRegOp",
1678 { "code": eWalkCode,
1679 "r_count": rCount,
1680 "predicate_test": predicateTest,
1681 "op_class": opClass }, [])
1682 header_output += NeonRegRegOpDeclare.subst(iop)
1683 exec_output += NeonEqualRegExecute.subst(iop)
1684 for type in types:
1685 substDict = { "targs" : type,
1686 "class_name" : Name }
1687 exec_output += NeonExecDeclare.subst(substDict)
1688
1689 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1690 global header_output, exec_output
1691 eWalkCode = simdEnabledCheckCode + '''
1692 RegVect srcReg1, destReg;
1693 '''
1694 for reg in range(rCount):
1695 eWalkCode += '''
1696 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1697 ''' % { "reg" : reg }
1698 if readDest:
1699 eWalkCode += '''
1700 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1701 ''' % { "reg" : reg }
1702 readDestCode = ''
1703 if readDest:
1704 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1705 eWalkCode += '''
1706 for (unsigned i = 0; i < eCount; i++) {
1707 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1708 Element destElem;
1709 %(readDest)s
1710 %(op)s
1711 destReg.elements[i] = htog(destElem);
1712 }
1713 ''' % { "op" : op, "readDest" : readDestCode }
1714 for reg in range(rCount):
1715 eWalkCode += '''
1716 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1717 ''' % { "reg" : reg }
1718 iop = InstObjParams(name, Name,
1719 "RegRegImmOp",
1720 { "code": eWalkCode,
1721 "r_count": rCount,
1722 "predicate_test": predicateTest,
1723 "op_class": opClass }, [])
1724 header_output += NeonRegRegImmOpDeclare.subst(iop)
1725 exec_output += NeonEqualRegExecute.subst(iop)
1726 for type in types:
1727 substDict = { "targs" : type,
1728 "class_name" : Name }
1729 exec_output += NeonExecDeclare.subst(substDict)
1730
1731 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1732 global header_output, exec_output
1733 eWalkCode = simdEnabledCheckCode + '''
1734 RegVect srcReg1, destReg;
1735 '''
1736 for reg in range(rCount):
1737 eWalkCode += '''
1738 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1739 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1740 ''' % { "reg" : reg }
1741 if readDest:
1742 eWalkCode += '''
1743 ''' % { "reg" : reg }
1744 readDestCode = ''
1745 if readDest:
1746 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1747 eWalkCode += op
1748 for reg in range(rCount):
1749 eWalkCode += '''
1750 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1751 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1752 ''' % { "reg" : reg }
1753 iop = InstObjParams(name, Name,
1754 "RegRegOp",
1755 { "code": eWalkCode,
1756 "r_count": rCount,
1757 "predicate_test": predicateTest,
1758 "op_class": opClass }, [])
1759 header_output += NeonRegRegOpDeclare.subst(iop)
1760 exec_output += NeonEqualRegExecute.subst(iop)
1761 for type in types:
1762 substDict = { "targs" : type,
1763 "class_name" : Name }
1764 exec_output += NeonExecDeclare.subst(substDict)
1765
1766 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1767 readDest=False, toInt=False):
1768 global header_output, exec_output
1769 eWalkCode = simdEnabledCheckCode + '''
1770 typedef FloatReg FloatVect[rCount];
1771 FloatVect srcRegs1;
1772 '''
1773 if toInt:
1774 eWalkCode += 'RegVect destRegs;\n'
1775 else:
1776 eWalkCode += 'FloatVect destRegs;\n'
1777 for reg in range(rCount):
1778 eWalkCode += '''
1779 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1780 ''' % { "reg" : reg }
1781 if readDest:
1782 if toInt:
1783 eWalkCode += '''
1784 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1785 ''' % { "reg" : reg }
1786 else:
1787 eWalkCode += '''
1788 destRegs[%(reg)d] = FpDestP%(reg)d;
1789 ''' % { "reg" : reg }
1790 readDestCode = ''
1791 if readDest:
1792 readDestCode = 'destReg = destRegs[i];'
1793 destType = 'FloatReg'
1794 writeDest = 'destRegs[r] = destReg;'
1795 if toInt:
1796 destType = 'FloatRegBits'
1797 writeDest = 'destRegs.regs[r] = destReg;'
1798 eWalkCode += '''
1799 for (unsigned r = 0; r < rCount; r++) {
1800 FloatReg srcReg1 = srcRegs1[r];
1801 %(destType)s destReg;
1802 %(readDest)s
1803 %(op)s
1804 %(writeDest)s
1805 }
1806 ''' % { "op" : op,
1807 "readDest" : readDestCode,
1808 "destType" : destType,
1809 "writeDest" : writeDest }
1810 for reg in range(rCount):
1811 if toInt:
1812 eWalkCode += '''
1813 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1814 ''' % { "reg" : reg }
1815 else:
1816 eWalkCode += '''
1817 FpDestP%(reg)d = destRegs[%(reg)d];
1818 ''' % { "reg" : reg }
1819 iop = InstObjParams(name, Name,
1820 "FpRegRegOp",
1821 { "code": eWalkCode,
1822 "r_count": rCount,
1823 "predicate_test": predicateTest,
1824 "op_class": opClass }, [])
1825 header_output += NeonRegRegOpDeclare.subst(iop)
1826 exec_output += NeonEqualRegExecute.subst(iop)
1827 for type in types:
1828 substDict = { "targs" : type,
1829 "class_name" : Name }
1830 exec_output += NeonExecDeclare.subst(substDict)
1831
1832 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1833 global header_output, exec_output
1834 eWalkCode = simdEnabledCheckCode + '''
1835 RegVect srcRegs;
1836 BigRegVect destReg;
1837 '''
1838 for reg in range(rCount):
1839 eWalkCode += '''
1840 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1841 ''' % { "reg" : reg }
1842 if readDest:
1843 eWalkCode += '''
1844 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1845 ''' % { "reg" : reg }
1846 readDestCode = ''
1847 if readDest:
1848 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1849 eWalkCode += '''
1850 for (unsigned i = 0; i < eCount / 2; i++) {
1851 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1852 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1853 BigElement destElem;
1854 %(readDest)s
1855 %(op)s
1856 destReg.elements[i] = htog(destElem);
1857 }
1858 ''' % { "op" : op, "readDest" : readDestCode }
1859 for reg in range(rCount):
1860 eWalkCode += '''
1861 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1862 ''' % { "reg" : reg }
1863 iop = InstObjParams(name, Name,
1864 "RegRegOp",
1865 { "code": eWalkCode,
1866 "r_count": rCount,
1867 "predicate_test": predicateTest,
1868 "op_class": opClass }, [])
1869 header_output += NeonRegRegOpDeclare.subst(iop)
1870 exec_output += NeonUnequalRegExecute.subst(iop)
1871 for type in types:
1872 substDict = { "targs" : type,
1873 "class_name" : Name }
1874 exec_output += NeonExecDeclare.subst(substDict)
1875
1876 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1877 global header_output, exec_output
1878 eWalkCode = simdEnabledCheckCode + '''
1879 BigRegVect srcReg1;
1880 RegVect destReg;
1881 '''
1882 for reg in range(4):
1883 eWalkCode += '''
1884 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1885 ''' % { "reg" : reg }
1886 if readDest:
1887 for reg in range(2):
1888 eWalkCode += '''
1889 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1890 ''' % { "reg" : reg }
1891 readDestCode = ''
1892 if readDest:
1893 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1894 eWalkCode += '''
1895 for (unsigned i = 0; i < eCount; i++) {
1896 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1897 Element destElem;
1898 %(readDest)s
1899 %(op)s
1900 destReg.elements[i] = htog(destElem);
1901 }
1902 ''' % { "op" : op, "readDest" : readDestCode }
1903 for reg in range(2):
1904 eWalkCode += '''
1905 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1906 ''' % { "reg" : reg }
1907 iop = InstObjParams(name, Name,
1908 "RegRegOp",
1909 { "code": eWalkCode,
1910 "r_count": 2,
1911 "predicate_test": predicateTest,
1912 "op_class": opClass }, [])
1913 header_output += NeonRegRegOpDeclare.subst(iop)
1914 exec_output += NeonUnequalRegExecute.subst(iop)
1915 for type in types:
1916 substDict = { "targs" : type,
1917 "class_name" : Name }
1918 exec_output += NeonExecDeclare.subst(substDict)
1919
1920 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1921 global header_output, exec_output
1922 eWalkCode = simdEnabledCheckCode + '''
1923 RegVect destReg;
1924 '''
1925 if readDest:
1926 for reg in range(rCount):
1927 eWalkCode += '''
1928 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1929 ''' % { "reg" : reg }
1930 readDestCode = ''
1931 if readDest:
1932 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1933 eWalkCode += '''
1934 for (unsigned i = 0; i < eCount; i++) {
1935 Element destElem;
1936 %(readDest)s
1937 %(op)s
1938 destReg.elements[i] = htog(destElem);
1939 }
1940 ''' % { "op" : op, "readDest" : readDestCode }
1941 for reg in range(rCount):
1942 eWalkCode += '''
1943 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1944 ''' % { "reg" : reg }
1945 iop = InstObjParams(name, Name,
1946 "RegImmOp",
1947 { "code": eWalkCode,
1948 "r_count": rCount,
1949 "predicate_test": predicateTest,
1950 "op_class": opClass }, [])
1951 header_output += NeonRegImmOpDeclare.subst(iop)
1952 exec_output += NeonEqualRegExecute.subst(iop)
1953 for type in types:
1954 substDict = { "targs" : type,
1955 "class_name" : Name }
1956 exec_output += NeonExecDeclare.subst(substDict)
1957
1958 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1959 global header_output, exec_output
1960 eWalkCode = simdEnabledCheckCode + '''
1961 RegVect srcReg1;
1962 BigRegVect destReg;
1963 '''
1964 for reg in range(2):
1965 eWalkCode += '''
1966 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1967 ''' % { "reg" : reg }
1968 if readDest:
1969 for reg in range(4):
1970 eWalkCode += '''
1971 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1972 ''' % { "reg" : reg }
1973 readDestCode = ''
1974 if readDest:
1975 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1976 eWalkCode += '''
1977 for (unsigned i = 0; i < eCount; i++) {
1978 Element srcElem1 = gtoh(srcReg1.elements[i]);
1979 BigElement destElem;
1980 %(readDest)s
1981 %(op)s
1982 destReg.elements[i] = htog(destElem);
1983 }
1984 ''' % { "op" : op, "readDest" : readDestCode }
1985 for reg in range(4):
1986 eWalkCode += '''
1987 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1988 ''' % { "reg" : reg }
1989 iop = InstObjParams(name, Name,
1990 "RegRegOp",
1991 { "code": eWalkCode,
1992 "r_count": 2,
1993 "predicate_test": predicateTest,
1994 "op_class": opClass }, [])
1995 header_output += NeonRegRegOpDeclare.subst(iop)
1996 exec_output += NeonUnequalRegExecute.subst(iop)
1997 for type in types:
1998 substDict = { "targs" : type,
1999 "class_name" : Name }
2000 exec_output += NeonExecDeclare.subst(substDict)
2001
2002 vhaddCode = '''
2003 Element carryBit =
2004 (((unsigned)srcElem1 & 0x1) +
2005 ((unsigned)srcElem2 & 0x1)) >> 1;
2006 // Use division instead of a shift to ensure the sign extension works
2007 // right. The compiler will figure out if it can be a shift. Mask the
2008 // inputs so they get truncated correctly.
2009 destElem = (((srcElem1 & ~(Element)1) / 2) +
2010 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2011 '''
2012 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2013 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2014
2015 vrhaddCode = '''
2016 Element carryBit =
2017 (((unsigned)srcElem1 & 0x1) +
2018 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2019 // Use division instead of a shift to ensure the sign extension works
2020 // right. The compiler will figure out if it can be a shift. Mask the
2021 // inputs so they get truncated correctly.
2022 destElem = (((srcElem1 & ~(Element)1) / 2) +
2023 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2024 '''
2025 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2026 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2027
2028 vhsubCode = '''
2029 Element barrowBit =
2030 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2031 // Use division instead of a shift to ensure the sign extension works
2032 // right. The compiler will figure out if it can be a shift. Mask the
2033 // inputs so they get truncated correctly.
2034 destElem = (((srcElem1 & ~(Element)1) / 2) -
2035 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2036 '''
2037 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2038 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2039
2040 vandCode = '''
2041 destElem = srcElem1 & srcElem2;
2042 '''
2043 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2044 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2045
2046 vbicCode = '''
2047 destElem = srcElem1 & ~srcElem2;
2048 '''
2049 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2050 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2051
2052 vorrCode = '''
2053 destElem = srcElem1 | srcElem2;
2054 '''
2055 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2056 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2057
2058 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2059 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2060
2061 vornCode = '''
2062 destElem = srcElem1 | ~srcElem2;
2063 '''
2064 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2065 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2066
2067 veorCode = '''
2068 destElem = srcElem1 ^ srcElem2;
2069 '''
2070 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2071 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2072
2073 vbifCode = '''
2074 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2075 '''
2076 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2077 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2078 vbitCode = '''
2079 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2080 '''
2081 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2082 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2083 vbslCode = '''
2084 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2085 '''
2086 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2087 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2088
2089 vmaxCode = '''
2090 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2091 '''
2092 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2093 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2094
2095 vminCode = '''
2096 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2097 '''
2098 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2099 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2100
2101 vaddCode = '''
2102 destElem = srcElem1 + srcElem2;
2103 '''
2104 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2105 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2106
2107 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2108 2, vaddCode, pairwise=True)
2109 vaddlwCode = '''
2110 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2111 '''
2112 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2113 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2114 vaddhnCode = '''
2115 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2116 (sizeof(Element) * 8);
2117 '''
2118 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2119 vraddhnCode = '''
2120 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2121 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2122 (sizeof(Element) * 8);
2123 '''
2124 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2125
2126 vsubCode = '''
2127 destElem = srcElem1 - srcElem2;
2128 '''
2129 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2130 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2131 vsublwCode = '''
2132 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2133 '''
2134 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2135 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2136
2137 vqaddUCode = '''
2138 destElem = srcElem1 + srcElem2;
2139 FPSCR fpscr = (FPSCR) FpscrQc;
2140 if (destElem < srcElem1 || destElem < srcElem2) {
2141 destElem = (Element)(-1);
2142 fpscr.qc = 1;
2143 }
2144 FpscrQc = fpscr;
2145 '''
2146 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2147 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2148 vsubhnCode = '''
2149 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2150 (sizeof(Element) * 8);
2151 '''
2152 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2153 vrsubhnCode = '''
2154 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2155 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2156 (sizeof(Element) * 8);
2157 '''
2158 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2159
2160 vqaddSCode = '''
2161 destElem = srcElem1 + srcElem2;
2162 FPSCR fpscr = (FPSCR) FpscrQc;
2163 bool negDest = (destElem < 0);
2164 bool negSrc1 = (srcElem1 < 0);
2165 bool negSrc2 = (srcElem2 < 0);
2166 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2167 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2168 if (negDest)
2169 destElem -= 1;
2170 fpscr.qc = 1;
2171 }
2172 FpscrQc = fpscr;
2173 '''
2174 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2175 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2176
2177 vqsubUCode = '''
2178 destElem = srcElem1 - srcElem2;
2179 FPSCR fpscr = (FPSCR) FpscrQc;
2180 if (destElem > srcElem1) {
2181 destElem = 0;
2182 fpscr.qc = 1;
2183 }
2184 FpscrQc = fpscr;
2185 '''
2186 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2187 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2188
2189 vqsubSCode = '''
2190 destElem = srcElem1 - srcElem2;
2191 FPSCR fpscr = (FPSCR) FpscrQc;
2192 bool negDest = (destElem < 0);
2193 bool negSrc1 = (srcElem1 < 0);
2194 bool posSrc2 = (srcElem2 >= 0);
2195 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2196 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2197 if (negDest)
2198 destElem -= 1;
2199 fpscr.qc = 1;
2200 }
2201 FpscrQc = fpscr;
2202 '''
2203 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2204 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2205
2206 vcgtCode = '''
2207 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2208 '''
2209 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2210 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2211
2212 vcgeCode = '''
2213 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2214 '''
2215 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2216 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2217
2218 vceqCode = '''
2219 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2220 '''
2221 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2222 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2223
2224 vshlCode = '''
2225 int16_t shiftAmt = (int8_t)srcElem2;
2226 if (shiftAmt < 0) {
2227 shiftAmt = -shiftAmt;
2228 if (shiftAmt >= sizeof(Element) * 8) {
2229 shiftAmt = sizeof(Element) * 8 - 1;
2230 destElem = 0;
2231 } else {
2232 destElem = (srcElem1 >> shiftAmt);
2233 }
2234 // Make sure the right shift sign extended when it should.
2235 if (ltz(srcElem1) && !ltz(destElem)) {
2236 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2237 1 - shiftAmt));
2238 }
2239 } else {
2240 if (shiftAmt >= sizeof(Element) * 8) {
2241 destElem = 0;
2242 } else {
2243 destElem = srcElem1 << shiftAmt;
2244 }
2245 }
2246 '''
2247 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2248 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2249
2250 vrshlCode = '''
2251 int16_t shiftAmt = (int8_t)srcElem2;
2252 if (shiftAmt < 0) {
2253 shiftAmt = -shiftAmt;
2254 Element rBit = 0;
2255 if (shiftAmt <= sizeof(Element) * 8)
2256 rBit = bits(srcElem1, shiftAmt - 1);
2257 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2258 rBit = 1;
2259 if (shiftAmt >= sizeof(Element) * 8) {
2260 shiftAmt = sizeof(Element) * 8 - 1;
2261 destElem = 0;
2262 } else {
2263 destElem = (srcElem1 >> shiftAmt);
2264 }
2265 // Make sure the right shift sign extended when it should.
2266 if (ltz(srcElem1) && !ltz(destElem)) {
2267 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2268 1 - shiftAmt));
2269 }
2270 destElem += rBit;
2271 } else if (shiftAmt > 0) {
2272 if (shiftAmt >= sizeof(Element) * 8) {
2273 destElem = 0;
2274 } else {
2275 destElem = srcElem1 << shiftAmt;
2276 }
2277 } else {
2278 destElem = srcElem1;
2279 }
2280 '''
2281 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2282 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2283
2284 vqshlUCode = '''
2285 int16_t shiftAmt = (int8_t)srcElem2;
2286 FPSCR fpscr = (FPSCR) FpscrQc;
2287 if (shiftAmt < 0) {
2288 shiftAmt = -shiftAmt;
2289 if (shiftAmt >= sizeof(Element) * 8) {
2290 shiftAmt = sizeof(Element) * 8 - 1;
2291 destElem = 0;
2292 } else {
2293 destElem = (srcElem1 >> shiftAmt);
2294 }
2295 } else if (shiftAmt > 0) {
2296 if (shiftAmt >= sizeof(Element) * 8) {
2297 if (srcElem1 != 0) {
2298 destElem = mask(sizeof(Element) * 8);
2299 fpscr.qc = 1;
2300 } else {
2301 destElem = 0;
2302 }
2303 } else {
2304 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2305 sizeof(Element) * 8 - shiftAmt)) {
2306 destElem = mask(sizeof(Element) * 8);
2307 fpscr.qc = 1;
2308 } else {
2309 destElem = srcElem1 << shiftAmt;
2310 }
2311 }
2312 } else {
2313 destElem = srcElem1;
2314 }
2315 FpscrQc = fpscr;
2316 '''
2317 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2318 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2319
2320 vqshlSCode = '''
2321 int16_t shiftAmt = (int8_t)srcElem2;
2322 FPSCR fpscr = (FPSCR) FpscrQc;
2323 if (shiftAmt < 0) {
2324 shiftAmt = -shiftAmt;
2325 if (shiftAmt >= sizeof(Element) * 8) {
2326 shiftAmt = sizeof(Element) * 8 - 1;
2327 destElem = 0;
2328 } else {
2329 destElem = (srcElem1 >> shiftAmt);
2330 }
2331 // Make sure the right shift sign extended when it should.
2332 if (srcElem1 < 0 && destElem >= 0) {
2333 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2334 1 - shiftAmt));
2335 }
2336 } else if (shiftAmt > 0) {
2337 bool sat = false;
2338 if (shiftAmt >= sizeof(Element) * 8) {
2339 if (srcElem1 != 0)
2340 sat = true;
2341 else
2342 destElem = 0;
2343 } else {
2344 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2345 sizeof(Element) * 8 - 1 - shiftAmt) !=
2346 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2347 sat = true;
2348 } else {
2349 destElem = srcElem1 << shiftAmt;
2350 }
2351 }
2352 if (sat) {
2353 fpscr.qc = 1;
2354 destElem = mask(sizeof(Element) * 8 - 1);
2355 if (srcElem1 < 0)
2356 destElem = ~destElem;
2357 }
2358 } else {
2359 destElem = srcElem1;
2360 }
2361 FpscrQc = fpscr;
2362 '''
2363 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2364 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2365
2366 vqrshlUCode = '''
2367 int16_t shiftAmt = (int8_t)srcElem2;
2368 FPSCR fpscr = (FPSCR) FpscrQc;
2369 if (shiftAmt < 0) {
2370 shiftAmt = -shiftAmt;
2371 Element rBit = 0;
2372 if (shiftAmt <= sizeof(Element) * 8)
2373 rBit = bits(srcElem1, shiftAmt - 1);
2374 if (shiftAmt >= sizeof(Element) * 8) {
2375 shiftAmt = sizeof(Element) * 8 - 1;
2376 destElem = 0;
2377 } else {
2378 destElem = (srcElem1 >> shiftAmt);
2379 }
2380 destElem += rBit;
2381 } else {
2382 if (shiftAmt >= sizeof(Element) * 8) {
2383 if (srcElem1 != 0) {
2384 destElem = mask(sizeof(Element) * 8);
2385 fpscr.qc = 1;
2386 } else {
2387 destElem = 0;
2388 }
2389 } else {
2390 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2391 sizeof(Element) * 8 - shiftAmt)) {
2392 destElem = mask(sizeof(Element) * 8);
2393 fpscr.qc = 1;
2394 } else {
2395 destElem = srcElem1 << shiftAmt;
2396 }
2397 }
2398 }
2399 FpscrQc = fpscr;
2400 '''
2401 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2402 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2403
2404 vqrshlSCode = '''
2405 int16_t shiftAmt = (int8_t)srcElem2;
2406 FPSCR fpscr = (FPSCR) FpscrQc;
2407 if (shiftAmt < 0) {
2408 shiftAmt = -shiftAmt;
2409 Element rBit = 0;
2410 if (shiftAmt <= sizeof(Element) * 8)
2411 rBit = bits(srcElem1, shiftAmt - 1);
2412 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2413 rBit = 1;
2414 if (shiftAmt >= sizeof(Element) * 8) {
2415 shiftAmt = sizeof(Element) * 8 - 1;
2416 destElem = 0;
2417 } else {
2418 destElem = (srcElem1 >> shiftAmt);
2419 }
2420 // Make sure the right shift sign extended when it should.
2421 if (srcElem1 < 0 && destElem >= 0) {
2422 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2423 1 - shiftAmt));
2424 }
2425 destElem += rBit;
2426 } else if (shiftAmt > 0) {
2427 bool sat = false;
2428 if (shiftAmt >= sizeof(Element) * 8) {
2429 if (srcElem1 != 0)
2430 sat = true;
2431 else
2432 destElem = 0;
2433 } else {
2434 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2435 sizeof(Element) * 8 - 1 - shiftAmt) !=
2436 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2437 sat = true;
2438 } else {
2439 destElem = srcElem1 << shiftAmt;
2440 }
2441 }
2442 if (sat) {
2443 fpscr.qc = 1;
2444 destElem = mask(sizeof(Element) * 8 - 1);
2445 if (srcElem1 < 0)
2446 destElem = ~destElem;
2447 }
2448 } else {
2449 destElem = srcElem1;
2450 }
2451 FpscrQc = fpscr;
2452 '''
2453 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2454 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2455
2456 vabaCode = '''
2457 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2458 (srcElem2 - srcElem1);
2459 '''
2460 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2461 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2462 vabalCode = '''
2463 destElem += (srcElem1 > srcElem2) ?
2464 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2465 ((BigElement)srcElem2 - (BigElement)srcElem1);
2466 '''
2467 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2468
2469 vabdCode = '''
2470 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2471 (srcElem2 - srcElem1);
2472 '''
2473 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2474 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2475 vabdlCode = '''
2476 destElem = (srcElem1 > srcElem2) ?
2477 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2478 ((BigElement)srcElem2 - (BigElement)srcElem1);
2479 '''
2480 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2481
2482 vtstCode = '''
2483 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2484 '''
2485 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2486 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2487
2488 vmulCode = '''
2489 destElem = srcElem1 * srcElem2;
2490 '''
2491 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2492 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2493 vmullCode = '''
2494 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2495 '''
2496 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2497
2498 vmlaCode = '''
2499 destElem = destElem + srcElem1 * srcElem2;
2500 '''
2501 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2502 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2503 vmlalCode = '''
2504 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2505 '''
2506 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2507
2508 vqdmlalCode = '''
2509 FPSCR fpscr = (FPSCR) FpscrQc;
2510 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2511 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2512 Element halfNeg = maxNeg / 2;
2513 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2514 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2515 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2516 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2517 fpscr.qc = 1;
2518 }
2519 bool negPreDest = ltz(destElem);
2520 destElem += midElem;
2521 bool negDest = ltz(destElem);
2522 bool negMid = ltz(midElem);
2523 if (negPreDest == negMid && negMid != negDest) {
2524 destElem = mask(sizeof(BigElement) * 8 - 1);
2525 if (negPreDest)
2526 destElem = ~destElem;
2527 fpscr.qc = 1;
2528 }
2529 FpscrQc = fpscr;
2530 '''
2531 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2532
2533 vqdmlslCode = '''
2534 FPSCR fpscr = (FPSCR) FpscrQc;
2535 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2536 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2537 Element halfNeg = maxNeg / 2;
2538 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2539 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2540 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2541 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2542 fpscr.qc = 1;
2543 }
2544 bool negPreDest = ltz(destElem);
2545 destElem -= midElem;
2546 bool negDest = ltz(destElem);
2547 bool posMid = ltz((BigElement)-midElem);
2548 if (negPreDest == posMid && posMid != negDest) {
2549 destElem = mask(sizeof(BigElement) * 8 - 1);
2550 if (negPreDest)
2551 destElem = ~destElem;
2552 fpscr.qc = 1;
2553 }
2554 FpscrQc = fpscr;
2555 '''
2556 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2557
2558 vqdmullCode = '''
2559 FPSCR fpscr = (FPSCR) FpscrQc;
2560 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2561 if (srcElem1 == srcElem2 &&
2562 srcElem1 == (Element)((Element)1 <<
2563 (Element)(sizeof(Element) * 8 - 1))) {
2564 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2565 fpscr.qc = 1;
2566 }
2567 FpscrQc = fpscr;
2568 '''
2569 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2570
2571 vmlsCode = '''
2572 destElem = destElem - srcElem1 * srcElem2;
2573 '''
2574 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2575 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2576 vmlslCode = '''
2577 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2578 '''
2579 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2580
2581 vmulpCode = '''
2582 destElem = 0;
2583 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2584 if (bits(srcElem2, j))
2585 destElem ^= srcElem1 << j;
2586 }
2587 '''
2588 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2589 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2590 vmullpCode = '''
2591 destElem = 0;
2592 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2593 if (bits(srcElem2, j))
2594 destElem ^= (BigElement)srcElem1 << j;
2595 }
2596 '''
2597 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2598
2599 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2600
2601 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2602
2603 vqdmulhCode = '''
2604 FPSCR fpscr = (FPSCR) FpscrQc;
2605 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2606 (sizeof(Element) * 8);
2607 if (srcElem1 == srcElem2 &&
2608 srcElem1 == (Element)((Element)1 <<
2609 (sizeof(Element) * 8 - 1))) {
2610 destElem = ~srcElem1;
2611 fpscr.qc = 1;
2612 }
2613 FpscrQc = fpscr;
2614 '''
2615 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2616 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2617
2618 vqrdmulhCode = '''
2619 FPSCR fpscr = (FPSCR) FpscrQc;
2620 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2621 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2622 (sizeof(Element) * 8);
2623 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2624 Element halfNeg = maxNeg / 2;
2625 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2626 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2627 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2628 if (destElem < 0) {
2629 destElem = mask(sizeof(Element) * 8 - 1);
2630 } else {
2631 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2632 }
2633 fpscr.qc = 1;
2634 }
2635 FpscrQc = fpscr;
2636 '''
2637 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2638 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2639 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2640 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2641
2642 vmaxfpCode = '''
2643 FPSCR fpscr = (FPSCR) FpscrExc;
2644 bool done;
2645 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2646 if (!done) {
2647 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2648 true, true, VfpRoundNearest);
2649 } else if (flushToZero(srcReg1, srcReg2)) {
2650 fpscr.idc = 1;
2651 }
2652 FpscrExc = fpscr;
2653 '''
2654 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2655 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2656
2657 vminfpCode = '''
2658 FPSCR fpscr = (FPSCR) FpscrExc;
2659 bool done;
2660 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2661 if (!done) {
2662 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2663 true, true, VfpRoundNearest);
2664 } else if (flushToZero(srcReg1, srcReg2)) {
2665 fpscr.idc = 1;
2666 }
2667 FpscrExc = fpscr;
2668 '''
2669 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2670 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2671
2672 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2673 2, vmaxfpCode, pairwise=True)
2674 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2675 4, vmaxfpCode, pairwise=True)
2676
2677 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2678 2, vminfpCode, pairwise=True)
2679 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2680 4, vminfpCode, pairwise=True)
2681
2682 vaddfpCode = '''
2683 FPSCR fpscr = (FPSCR) FpscrExc;
2684 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2685 true, true, VfpRoundNearest);
2686 FpscrExc = fpscr;
2687 '''
2688 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2689 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2690
2691 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2692 2, vaddfpCode, pairwise=True)
2693 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2694 4, vaddfpCode, pairwise=True)
2695
2696 vsubfpCode = '''
2697 FPSCR fpscr = (FPSCR) FpscrExc;
2698 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2699 true, true, VfpRoundNearest);
2700 FpscrExc = fpscr;
2701 '''
2702 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2703 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2704
2705 vmulfpCode = '''
2706 FPSCR fpscr = (FPSCR) FpscrExc;
2707 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2708 true, true, VfpRoundNearest);
2709 FpscrExc = fpscr;
2710 '''
2711 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2712 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2713
2714 vmlafpCode = '''
2715 FPSCR fpscr = (FPSCR) FpscrExc;
2716 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2717 true, true, VfpRoundNearest);
2718 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2719 true, true, VfpRoundNearest);
2720 FpscrExc = fpscr;
2721 '''
2722 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2723 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2724
2725 vfmafpCode = '''
2726 FPSCR fpscr = (FPSCR) FpscrExc;
2727 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2728 true, true, VfpRoundNearest);
2729 FpscrExc = fpscr;
2730 '''
2731 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2732 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2733
2734 vfmsfpCode = '''
2735 FPSCR fpscr = (FPSCR) FpscrExc;
2736 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2737 true, true, VfpRoundNearest);
2738 FpscrExc = fpscr;
2739 '''
2740 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2741 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2742
2743 vmlsfpCode = '''
2744 FPSCR fpscr = (FPSCR) FpscrExc;
2745 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2746 true, true, VfpRoundNearest);
2747 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2748 true, true, VfpRoundNearest);
2749 FpscrExc = fpscr;
2750 '''
2751 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2752 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2753
2754 vcgtfpCode = '''
2755 FPSCR fpscr = (FPSCR) FpscrExc;
2756 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2757 true, true, VfpRoundNearest);
2758 destReg = (res == 0) ? -1 : 0;
2759 if (res == 2.0)
2760 fpscr.ioc = 1;
2761 FpscrExc = fpscr;
2762 '''
2763 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2764 2, vcgtfpCode, toInt = True)
2765 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2766 4, vcgtfpCode, toInt = True)
2767
2768 vcgefpCode = '''
2769 FPSCR fpscr = (FPSCR) FpscrExc;
2770 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2771 true, true, VfpRoundNearest);
2772 destReg = (res == 0) ? -1 : 0;
2773 if (res == 2.0)
2774 fpscr.ioc = 1;
2775 FpscrExc = fpscr;
2776 '''
2777 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2778 2, vcgefpCode, toInt = True)
2779 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2780 4, vcgefpCode, toInt = True)
2781
2782 vacgtfpCode = '''
2783 FPSCR fpscr = (FPSCR) FpscrExc;
2784 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2785 true, true, VfpRoundNearest);
2786 destReg = (res == 0) ? -1 : 0;
2787 if (res == 2.0)
2788 fpscr.ioc = 1;
2789 FpscrExc = fpscr;
2790 '''
2791 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2792 2, vacgtfpCode, toInt = True)
2793 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2794 4, vacgtfpCode, toInt = True)
2795
2796 vacgefpCode = '''
2797 FPSCR fpscr = (FPSCR) FpscrExc;
2798 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2799 true, true, VfpRoundNearest);
2800 destReg = (res == 0) ? -1 : 0;
2801 if (res == 2.0)
2802 fpscr.ioc = 1;
2803 FpscrExc = fpscr;
2804 '''
2805 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2806 2, vacgefpCode, toInt = True)
2807 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2808 4, vacgefpCode, toInt = True)
2809
2810 vceqfpCode = '''
2811 FPSCR fpscr = (FPSCR) FpscrExc;
2812 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2813 true, true, VfpRoundNearest);
2814 destReg = (res == 0) ? -1 : 0;
2815 if (res == 2.0)
2816 fpscr.ioc = 1;
2817 FpscrExc = fpscr;
2818 '''
2819 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2820 2, vceqfpCode, toInt = True)
2821 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2822 4, vceqfpCode, toInt = True)
2823
2824 vrecpsCode = '''
2825 FPSCR fpscr = (FPSCR) FpscrExc;
2826 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2827 true, true, VfpRoundNearest);
2828 FpscrExc = fpscr;
2829 '''
2830 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2831 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2832
2833 vrsqrtsCode = '''
2834 FPSCR fpscr = (FPSCR) FpscrExc;
2835 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2836 true, true, VfpRoundNearest);
2837 FpscrExc = fpscr;
2838 '''
2839 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2840 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2841
2842 vabdfpCode = '''
2843 FPSCR fpscr = (FPSCR) FpscrExc;
2844 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2845 true, true, VfpRoundNearest);
2846 destReg = fabs(mid);
2847 FpscrExc = fpscr;
2848 '''
2849 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2850 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2851
2852 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2853 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2854 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2855 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2856 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2857
2858 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2859 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2860 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2861 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2862 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2863
2864 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2865 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2866 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2867 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2868 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2869
2870 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2871 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2872 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2873 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2874 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2875 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2876 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2877 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2878 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2879
2880 vshrCode = '''
2881 if (imm >= sizeof(srcElem1) * 8) {
2882 if (ltz(srcElem1))
2883 destElem = -1;
2884 else
2885 destElem = 0;
2886 } else {
2887 destElem = srcElem1 >> imm;
2888 }
2889 '''
2890 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2891 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2892
2893 vsraCode = '''
2894 Element mid;;
2895 if (imm >= sizeof(srcElem1) * 8) {
2896 mid = ltz(srcElem1) ? -1 : 0;
2897 } else {
2898 mid = srcElem1 >> imm;
2899 if (ltz(srcElem1) && !ltz(mid)) {
2900 mid |= -(mid & ((Element)1 <<
2901 (sizeof(Element) * 8 - 1 - imm)));
2902 }
2903 }
2904 destElem += mid;
2905 '''
2906 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2907 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2908
2909 vrshrCode = '''
2910 if (imm > sizeof(srcElem1) * 8) {
2911 destElem = 0;
2912 } else if (imm) {
2913 Element rBit = bits(srcElem1, imm - 1);
2914 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2915 } else {
2916 destElem = srcElem1;
2917 }
2918 '''
2919 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2920 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2921
2922 vrsraCode = '''
2923 if (imm > sizeof(srcElem1) * 8) {
2924 destElem += 0;
2925 } else if (imm) {
2926 Element rBit = bits(srcElem1, imm - 1);
2927 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2928 } else {
2929 destElem += srcElem1;
2930 }
2931 '''
2932 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2933 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2934
2935 vsriCode = '''
2936 if (imm >= sizeof(Element) * 8)
2937 destElem = destElem;
2938 else
2939 destElem = (srcElem1 >> imm) |
2940 (destElem & ~mask(sizeof(Element) * 8 - imm));
2941 '''
2942 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2943 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2944
2945 vshlCode = '''
2946 if (imm >= sizeof(Element) * 8)
2947 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2948 else
2949 destElem = srcElem1 << imm;
2950 '''
2951 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2952 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2953
2954 vsliCode = '''
2955 if (imm >= sizeof(Element) * 8)
2956 destElem = destElem;
2957 else
2958 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2959 '''
2960 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2961 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2962
2963 vqshlCode = '''
2964 FPSCR fpscr = (FPSCR) FpscrQc;
2965 if (imm >= sizeof(Element) * 8) {
2966 if (srcElem1 != 0) {
2967 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2968 if (srcElem1 > 0)
2969 destElem = ~destElem;
2970 fpscr.qc = 1;
2971 } else {
2972 destElem = 0;
2973 }
2974 } else if (imm) {
2975 destElem = (srcElem1 << imm);
2976 uint64_t topBits = bits((uint64_t)srcElem1,
2977 sizeof(Element) * 8 - 1,
2978 sizeof(Element) * 8 - 1 - imm);
2979 if (topBits != 0 && topBits != mask(imm + 1)) {
2980 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2981 if (srcElem1 > 0)
2982 destElem = ~destElem;
2983 fpscr.qc = 1;
2984 }
2985 } else {
2986 destElem = srcElem1;
2987 }
2988 FpscrQc = fpscr;
2989 '''
2990 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2991 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2992
2993 vqshluCode = '''
2994 FPSCR fpscr = (FPSCR) FpscrQc;
2995 if (imm >= sizeof(Element) * 8) {
2996 if (srcElem1 != 0) {
2997 destElem = mask(sizeof(Element) * 8);
2998 fpscr.qc = 1;
2999 } else {
3000 destElem = 0;
3001 }
3002 } else if (imm) {
3003 destElem = (srcElem1 << imm);
3004 uint64_t topBits = bits((uint64_t)srcElem1,
3005 sizeof(Element) * 8 - 1,
3006 sizeof(Element) * 8 - imm);
3007 if (topBits != 0) {
3008 destElem = mask(sizeof(Element) * 8);
3009 fpscr.qc = 1;
3010 }
3011 } else {
3012 destElem = srcElem1;
3013 }
3014 FpscrQc = fpscr;
3015 '''
3016 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3017 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3018
3019 vqshlusCode = '''
3020 FPSCR fpscr = (FPSCR) FpscrQc;
3021 if (imm >= sizeof(Element) * 8) {
3022 if (srcElem1 < 0) {
3023 destElem = 0;
3024 fpscr.qc = 1;
3025 } else if (srcElem1 > 0) {
3026 destElem = mask(sizeof(Element) * 8);
3027 fpscr.qc = 1;
3028 } else {
3029 destElem = 0;
3030 }
3031 } else if (imm) {
3032 destElem = (srcElem1 << imm);
3033 uint64_t topBits = bits((uint64_t)srcElem1,
3034 sizeof(Element) * 8 - 1,
3035 sizeof(Element) * 8 - imm);
3036 if (srcElem1 < 0) {
3037 destElem = 0;
3038 fpscr.qc = 1;
3039 } else if (topBits != 0) {
3040 destElem = mask(sizeof(Element) * 8);
3041 fpscr.qc = 1;
3042 }
3043 } else {
3044 if (srcElem1 < 0) {
3045 fpscr.qc = 1;
3046 destElem = 0;
3047 } else {
3048 destElem = srcElem1;
3049 }
3050 }
3051 FpscrQc = fpscr;
3052 '''
3053 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3054 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3055
3056 vshrnCode = '''
3057 if (imm >= sizeof(srcElem1) * 8) {
3058 destElem = 0;
3059 } else {
3060 destElem = srcElem1 >> imm;
3061 }
3062 '''
3063 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3064
3065 vrshrnCode = '''
3066 if (imm > sizeof(srcElem1) * 8) {
3067 destElem = 0;
3068 } else if (imm) {
3069 Element rBit = bits(srcElem1, imm - 1);
3070 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3071 } else {
3072 destElem = srcElem1;
3073 }
3074 '''
3075 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3076
3077 vqshrnCode = '''
3078 FPSCR fpscr = (FPSCR) FpscrQc;
3079 if (imm > sizeof(srcElem1) * 8) {
3080 if (srcElem1 != 0 && srcElem1 != -1)
3081 fpscr.qc = 1;
3082 destElem = 0;
3083 } else if (imm) {
3084 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3085 mid |= -(mid & ((BigElement)1 <<
3086 (sizeof(BigElement) * 8 - 1 - imm)));
3087 if (mid != (Element)mid) {
3088 destElem = mask(sizeof(Element) * 8 - 1);
3089 if (srcElem1 < 0)
3090 destElem = ~destElem;
3091 fpscr.qc = 1;
3092 } else {
3093 destElem = mid;
3094 }
3095 } else {
3096 destElem = srcElem1;
3097 }
3098 FpscrQc = fpscr;
3099 '''
3100 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3101
3102 vqshrunCode = '''
3103 FPSCR fpscr = (FPSCR) FpscrQc;
3104 if (imm > sizeof(srcElem1) * 8) {
3105 if (srcElem1 != 0)
3106 fpscr.qc = 1;
3107 destElem = 0;
3108 } else if (imm) {
3109 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3110 if (mid != (Element)mid) {
3111 destElem = mask(sizeof(Element) * 8);
3112 fpscr.qc = 1;
3113 } else {
3114 destElem = mid;
3115 }
3116 } else {
3117 destElem = srcElem1;
3118 }
3119 FpscrQc = fpscr;
3120 '''
3121 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3122 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3123
3124 vqshrunsCode = '''
3125 FPSCR fpscr = (FPSCR) FpscrQc;
3126 if (imm > sizeof(srcElem1) * 8) {
3127 if (srcElem1 != 0)
3128 fpscr.qc = 1;
3129 destElem = 0;
3130 } else if (imm) {
3131 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3132 if (bits(mid, sizeof(BigElement) * 8 - 1,
3133 sizeof(Element) * 8) != 0) {
3134 if (srcElem1 < 0) {
3135 destElem = 0;
3136 } else {
3137 destElem = mask(sizeof(Element) * 8);
3138 }
3139 fpscr.qc = 1;
3140 } else {
3141 destElem = mid;
3142 }
3143 } else {
3144 destElem = srcElem1;
3145 }
3146 FpscrQc = fpscr;
3147 '''
3148 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3149 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3150
3151 vqrshrnCode = '''
3152 FPSCR fpscr = (FPSCR) FpscrQc;
3153 if (imm > sizeof(srcElem1) * 8) {
3154 if (srcElem1 != 0 && srcElem1 != -1)
3155 fpscr.qc = 1;
3156 destElem = 0;
3157 } else if (imm) {
3158 BigElement mid = (srcElem1 >> (imm - 1));
3159 uint64_t rBit = mid & 0x1;
3160 mid >>= 1;
3161 mid |= -(mid & ((BigElement)1 <<
3162 (sizeof(BigElement) * 8 - 1 - imm)));
3163 mid += rBit;
3164 if (mid != (Element)mid) {
3165 destElem = mask(sizeof(Element) * 8 - 1);
3166 if (srcElem1 < 0)
3167 destElem = ~destElem;
3168 fpscr.qc = 1;
3169 } else {
3170 destElem = mid;
3171 }
3172 } else {
3173 if (srcElem1 != (Element)srcElem1) {
3174 destElem = mask(sizeof(Element) * 8 - 1);
3175 if (srcElem1 < 0)
3176 destElem = ~destElem;
3177 fpscr.qc = 1;
3178 } else {
3179 destElem = srcElem1;
3180 }
3181 }
3182 FpscrQc = fpscr;
3183 '''
3184 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3185 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3186
3187 vqrshrunCode = '''
3188 FPSCR fpscr = (FPSCR) FpscrQc;
3189 if (imm > sizeof(srcElem1) * 8) {
3190 if (srcElem1 != 0)
3191 fpscr.qc = 1;
3192 destElem = 0;
3193 } else if (imm) {
3194 BigElement mid = (srcElem1 >> (imm - 1));
3195 uint64_t rBit = mid & 0x1;
3196 mid >>= 1;
3197 mid += rBit;
3198 if (mid != (Element)mid) {
3199 destElem = mask(sizeof(Element) * 8);
3200 fpscr.qc = 1;
3201 } else {
3202 destElem = mid;
3203 }
3204 } else {
3205 if (srcElem1 != (Element)srcElem1) {
3206 destElem = mask(sizeof(Element) * 8 - 1);
3207 fpscr.qc = 1;
3208 } else {
3209 destElem = srcElem1;
3210 }
3211 }
3212 FpscrQc = fpscr;
3213 '''
3214 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3215 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3216
3217 vqrshrunsCode = '''
3218 FPSCR fpscr = (FPSCR) FpscrQc;
3219 if (imm > sizeof(srcElem1) * 8) {
3220 if (srcElem1 != 0)
3221 fpscr.qc = 1;
3222 destElem = 0;
3223 } else if (imm) {
3224 BigElement mid = (srcElem1 >> (imm - 1));
3225 uint64_t rBit = mid & 0x1;
3226 mid >>= 1;
3227 mid |= -(mid & ((BigElement)1 <<
3228 (sizeof(BigElement) * 8 - 1 - imm)));
3229 mid += rBit;
3230 if (bits(mid, sizeof(BigElement) * 8 - 1,
3231 sizeof(Element) * 8) != 0) {
3232 if (srcElem1 < 0) {
3233 destElem = 0;
3234 } else {
3235 destElem = mask(sizeof(Element) * 8);
3236 }
3237 fpscr.qc = 1;
3238 } else {
3239 destElem = mid;
3240 }
3241 } else {
3242 if (srcElem1 < 0) {
3243 fpscr.qc = 1;
3244 destElem = 0;
3245 } else {
3246 destElem = srcElem1;
3247 }
3248 }
3249 FpscrQc = fpscr;
3250 '''
3251 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3252 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3253
3254 vshllCode = '''
3255 if (imm >= sizeof(destElem) * 8) {
3256 destElem = 0;
3257 } else {
3258 destElem = (BigElement)srcElem1 << imm;
3259 }
3260 '''
3261 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3262
3263 vmovlCode = '''
3264 destElem = srcElem1;
3265 '''
3266 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3267
3268 vcvt2ufxCode = '''
3269 FPSCR fpscr = (FPSCR) FpscrExc;
3270 if (flushToZero(srcElem1))
3271 fpscr.idc = 1;
3272 VfpSavedState state = prepFpState(VfpRoundNearest);
3273 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3274 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3275 __asm__ __volatile__("" :: "m" (destReg));
3276 finishVfp(fpscr, state, true);
3277 FpscrExc = fpscr;
3278 '''
3279 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3280 2, vcvt2ufxCode, toInt = True)
3281 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3282 4, vcvt2ufxCode, toInt = True)
3283
3284 vcvt2sfxCode = '''
3285 FPSCR fpscr = (FPSCR) FpscrExc;
3286 if (flushToZero(srcElem1))
3287 fpscr.idc = 1;
3288 VfpSavedState state = prepFpState(VfpRoundNearest);
3289 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3290 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3291 __asm__ __volatile__("" :: "m" (destReg));
3292 finishVfp(fpscr, state, true);
3293 FpscrExc = fpscr;
3294 '''
3295 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3296 2, vcvt2sfxCode, toInt = True)
3297 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3298 4, vcvt2sfxCode, toInt = True)
3299
3300 vcvtu2fpCode = '''
3301 FPSCR fpscr = (FPSCR) FpscrExc;
3302 VfpSavedState state = prepFpState(VfpRoundNearest);
3303 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3304 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3305 __asm__ __volatile__("" :: "m" (destElem));
3306 finishVfp(fpscr, state, true);
3307 FpscrExc = fpscr;
3308 '''
3309 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3310 2, vcvtu2fpCode, fromInt = True)
3311 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3312 4, vcvtu2fpCode, fromInt = True)
3313
3314 vcvts2fpCode = '''
3315 FPSCR fpscr = (FPSCR) FpscrExc;
3316 VfpSavedState state = prepFpState(VfpRoundNearest);
3317 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3318 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3319 __asm__ __volatile__("" :: "m" (destElem));
3320 finishVfp(fpscr, state, true);
3321 FpscrExc = fpscr;
3322 '''
3323 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3324 2, vcvts2fpCode, fromInt = True)
3325 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3326 4, vcvts2fpCode, fromInt = True)
3327
3328 vcvts2hCode = '''
3329 destElem = 0;
3330 FPSCR fpscr = (FPSCR) FpscrExc;
3331 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3332 if (flushToZero(srcFp1))
3333 fpscr.idc = 1;
3334 VfpSavedState state = prepFpState(VfpRoundNearest);
3335 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3336 : "m" (srcFp1), "m" (destElem));
3337 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3338 fpscr.ahp, srcFp1);
3339 __asm__ __volatile__("" :: "m" (destElem));
3340 finishVfp(fpscr, state, true);
3341 FpscrExc = fpscr;
3342 '''
3343 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3344
3345 vcvth2sCode = '''
3346 destElem = 0;
3347 FPSCR fpscr = (FPSCR) FpscrExc;
3348 VfpSavedState state = prepFpState(VfpRoundNearest);
3349 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3350 : "m" (srcElem1), "m" (destElem));
3351 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3352 __asm__ __volatile__("" :: "m" (destElem));
3353 finishVfp(fpscr, state, true);
3354 FpscrExc = fpscr;
3355 '''
3356 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3357
3358 vrsqrteCode = '''
3359 destElem = unsignedRSqrtEstimate(srcElem1);
3360 '''
3361 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3362 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3363
3364 vrsqrtefpCode = '''
3365 FPSCR fpscr = (FPSCR) FpscrExc;
3366 if (flushToZero(srcReg1))
3367 fpscr.idc = 1;
3368 destReg = fprSqrtEstimate(fpscr, srcReg1);
3369 FpscrExc = fpscr;
3370 '''
3371 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3372 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3373
3374 vrecpeCode = '''
3375 destElem = unsignedRecipEstimate(srcElem1);
3376 '''
3377 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3378 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3379
3380 vrecpefpCode = '''
3381 FPSCR fpscr = (FPSCR) FpscrExc;
3382 if (flushToZero(srcReg1))
3383 fpscr.idc = 1;
3384 destReg = fpRecipEstimate(fpscr, srcReg1);
3385 FpscrExc = fpscr;
3386 '''
3387 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3388 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3389
3390 vrev16Code = '''
3391 destElem = srcElem1;
3392 unsigned groupSize = ((1 << 1) / sizeof(Element));
3393 unsigned reverseMask = (groupSize - 1);
3394 j = i ^ reverseMask;
3395 '''
3396 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3397 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3398 vrev32Code = '''
3399 destElem = srcElem1;
3400 unsigned groupSize = ((1 << 2) / sizeof(Element));
3401 unsigned reverseMask = (groupSize - 1);
3402 j = i ^ reverseMask;
3403 '''
3404 twoRegMiscInst("vrev32", "NVrev32D",
3405 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3406 twoRegMiscInst("vrev32", "NVrev32Q",
3407 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3408 vrev64Code = '''
3409 destElem = srcElem1;
3410 unsigned groupSize = ((1 << 3) / sizeof(Element));
3411 unsigned reverseMask = (groupSize - 1);
3412 j = i ^ reverseMask;
3413 '''
3414 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3415 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3416
1127 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130 signedTypes = smallSignedTypes + ("int64_t",)
1131 smallTypes = smallUnsignedTypes + smallSignedTypes
1132 allTypes = unsignedTypes + signedTypes
1133
1134 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135 readDest=False, pairwise=False):
1136 global header_output, exec_output
1137 eWalkCode = simdEnabledCheckCode + '''
1138 RegVect srcReg1, srcReg2, destReg;
1139 '''
1140 for reg in range(rCount):
1141 eWalkCode += '''
1142 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144 ''' % { "reg" : reg }
1145 if readDest:
1146 eWalkCode += '''
1147 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148 ''' % { "reg" : reg }
1149 readDestCode = ''
1150 if readDest:
1151 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152 if pairwise:
1153 eWalkCode += '''
1154 for (unsigned i = 0; i < eCount; i++) {
1155 Element srcElem1 = gtoh(2 * i < eCount ?
1156 srcReg1.elements[2 * i] :
1157 srcReg2.elements[2 * i - eCount]);
1158 Element srcElem2 = gtoh(2 * i < eCount ?
1159 srcReg1.elements[2 * i + 1] :
1160 srcReg2.elements[2 * i + 1 - eCount]);
1161 Element destElem;
1162 %(readDest)s
1163 %(op)s
1164 destReg.elements[i] = htog(destElem);
1165 }
1166 ''' % { "op" : op, "readDest" : readDestCode }
1167 else:
1168 eWalkCode += '''
1169 for (unsigned i = 0; i < eCount; i++) {
1170 Element srcElem1 = gtoh(srcReg1.elements[i]);
1171 Element srcElem2 = gtoh(srcReg2.elements[i]);
1172 Element destElem;
1173 %(readDest)s
1174 %(op)s
1175 destReg.elements[i] = htog(destElem);
1176 }
1177 ''' % { "op" : op, "readDest" : readDestCode }
1178 for reg in range(rCount):
1179 eWalkCode += '''
1180 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181 ''' % { "reg" : reg }
1182 iop = InstObjParams(name, Name,
1183 "RegRegRegOp",
1184 { "code": eWalkCode,
1185 "r_count": rCount,
1186 "predicate_test": predicateTest,
1187 "op_class": opClass }, [])
1188 header_output += NeonRegRegRegOpDeclare.subst(iop)
1189 exec_output += NeonEqualRegExecute.subst(iop)
1190 for type in types:
1191 substDict = { "targs" : type,
1192 "class_name" : Name }
1193 exec_output += NeonExecDeclare.subst(substDict)
1194
1195 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196 readDest=False, pairwise=False, toInt=False):
1197 global header_output, exec_output
1198 eWalkCode = simdEnabledCheckCode + '''
1199 typedef FloatReg FloatVect[rCount];
1200 FloatVect srcRegs1, srcRegs2;
1201 '''
1202 if toInt:
1203 eWalkCode += 'RegVect destRegs;\n'
1204 else:
1205 eWalkCode += 'FloatVect destRegs;\n'
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210 ''' % { "reg" : reg }
1211 if readDest:
1212 if toInt:
1213 eWalkCode += '''
1214 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215 ''' % { "reg" : reg }
1216 else:
1217 eWalkCode += '''
1218 destRegs[%(reg)d] = FpDestP%(reg)d;
1219 ''' % { "reg" : reg }
1220 readDestCode = ''
1221 if readDest:
1222 readDestCode = 'destReg = destRegs[r];'
1223 destType = 'FloatReg'
1224 writeDest = 'destRegs[r] = destReg;'
1225 if toInt:
1226 destType = 'FloatRegBits'
1227 writeDest = 'destRegs.regs[r] = destReg;'
1228 if pairwise:
1229 eWalkCode += '''
1230 for (unsigned r = 0; r < rCount; r++) {
1231 FloatReg srcReg1 = (2 * r < rCount) ?
1232 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1233 FloatReg srcReg2 = (2 * r < rCount) ?
1234 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235 %(destType)s destReg;
1236 %(readDest)s
1237 %(op)s
1238 %(writeDest)s
1239 }
1240 ''' % { "op" : op,
1241 "readDest" : readDestCode,
1242 "destType" : destType,
1243 "writeDest" : writeDest }
1244 else:
1245 eWalkCode += '''
1246 for (unsigned r = 0; r < rCount; r++) {
1247 FloatReg srcReg1 = srcRegs1[r];
1248 FloatReg srcReg2 = srcRegs2[r];
1249 %(destType)s destReg;
1250 %(readDest)s
1251 %(op)s
1252 %(writeDest)s
1253 }
1254 ''' % { "op" : op,
1255 "readDest" : readDestCode,
1256 "destType" : destType,
1257 "writeDest" : writeDest }
1258 for reg in range(rCount):
1259 if toInt:
1260 eWalkCode += '''
1261 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262 ''' % { "reg" : reg }
1263 else:
1264 eWalkCode += '''
1265 FpDestP%(reg)d = destRegs[%(reg)d];
1266 ''' % { "reg" : reg }
1267 iop = InstObjParams(name, Name,
1268 "FpRegRegRegOp",
1269 { "code": eWalkCode,
1270 "r_count": rCount,
1271 "predicate_test": predicateTest,
1272 "op_class": opClass }, [])
1273 header_output += NeonRegRegRegOpDeclare.subst(iop)
1274 exec_output += NeonEqualRegExecute.subst(iop)
1275 for type in types:
1276 substDict = { "targs" : type,
1277 "class_name" : Name }
1278 exec_output += NeonExecDeclare.subst(substDict)
1279
1280 def threeUnequalRegInst(name, Name, opClass, types, op,
1281 bigSrc1, bigSrc2, bigDest, readDest):
1282 global header_output, exec_output
1283 src1Cnt = src2Cnt = destCnt = 2
1284 src1Prefix = src2Prefix = destPrefix = ''
1285 if bigSrc1:
1286 src1Cnt = 4
1287 src1Prefix = 'Big'
1288 if bigSrc2:
1289 src2Cnt = 4
1290 src2Prefix = 'Big'
1291 if bigDest:
1292 destCnt = 4
1293 destPrefix = 'Big'
1294 eWalkCode = simdEnabledCheckCode + '''
1295 %sRegVect srcReg1;
1296 %sRegVect srcReg2;
1297 %sRegVect destReg;
1298 ''' % (src1Prefix, src2Prefix, destPrefix)
1299 for reg in range(src1Cnt):
1300 eWalkCode += '''
1301 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302 ''' % { "reg" : reg }
1303 for reg in range(src2Cnt):
1304 eWalkCode += '''
1305 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306 ''' % { "reg" : reg }
1307 if readDest:
1308 for reg in range(destCnt):
1309 eWalkCode += '''
1310 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311 ''' % { "reg" : reg }
1312 readDestCode = ''
1313 if readDest:
1314 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315 eWalkCode += '''
1316 for (unsigned i = 0; i < eCount; i++) {
1317 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319 %(destPrefix)sElement destElem;
1320 %(readDest)s
1321 %(op)s
1322 destReg.elements[i] = htog(destElem);
1323 }
1324 ''' % { "op" : op, "readDest" : readDestCode,
1325 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326 "destPrefix" : destPrefix }
1327 for reg in range(destCnt):
1328 eWalkCode += '''
1329 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "RegRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": 2,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegRegOpDeclare.subst(iop)
1338 exec_output += NeonUnequalRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345 threeUnequalRegInst(name, Name, opClass, types, op,
1346 True, True, False, readDest)
1347
1348 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349 threeUnequalRegInst(name, Name, opClass, types, op,
1350 False, False, True, readDest)
1351
1352 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353 threeUnequalRegInst(name, Name, opClass, types, op,
1354 True, False, True, readDest)
1355
1356 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357 global header_output, exec_output
1358 eWalkCode = simdEnabledCheckCode + '''
1359 RegVect srcReg1, srcReg2, destReg;
1360 '''
1361 for reg in range(rCount):
1362 eWalkCode += '''
1363 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365 ''' % { "reg" : reg }
1366 if readDest:
1367 eWalkCode += '''
1368 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369 ''' % { "reg" : reg }
1370 readDestCode = ''
1371 if readDest:
1372 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373 eWalkCode += '''
1374 if (imm < 0 && imm >= eCount) {
1375 fault = new UndefinedInstruction(machInst, false, mnemonic);
1376 } else {
1377 for (unsigned i = 0; i < eCount; i++) {
1378 Element srcElem1 = gtoh(srcReg1.elements[i]);
1379 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1380 Element destElem;
1381 %(readDest)s
1382 %(op)s
1383 destReg.elements[i] = htog(destElem);
1384 }
1385 }
1386 ''' % { "op" : op, "readDest" : readDestCode }
1387 for reg in range(rCount):
1388 eWalkCode += '''
1389 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1390 ''' % { "reg" : reg }
1391 iop = InstObjParams(name, Name,
1392 "RegRegRegImmOp",
1393 { "code": eWalkCode,
1394 "r_count": rCount,
1395 "predicate_test": predicateTest,
1396 "op_class": opClass }, [])
1397 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1398 exec_output += NeonEqualRegExecute.subst(iop)
1399 for type in types:
1400 substDict = { "targs" : type,
1401 "class_name" : Name }
1402 exec_output += NeonExecDeclare.subst(substDict)
1403
1404 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1405 global header_output, exec_output
1406 rCount = 2
1407 eWalkCode = simdEnabledCheckCode + '''
1408 RegVect srcReg1, srcReg2;
1409 BigRegVect destReg;
1410 '''
1411 for reg in range(rCount):
1412 eWalkCode += '''
1413 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1414 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1415 ''' % { "reg" : reg }
1416 if readDest:
1417 for reg in range(2 * rCount):
1418 eWalkCode += '''
1419 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1420 ''' % { "reg" : reg }
1421 readDestCode = ''
1422 if readDest:
1423 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1424 eWalkCode += '''
1425 if (imm < 0 && imm >= eCount) {
1426 fault = new UndefinedInstruction(machInst, false, mnemonic);
1427 } else {
1428 for (unsigned i = 0; i < eCount; i++) {
1429 Element srcElem1 = gtoh(srcReg1.elements[i]);
1430 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1431 BigElement destElem;
1432 %(readDest)s
1433 %(op)s
1434 destReg.elements[i] = htog(destElem);
1435 }
1436 }
1437 ''' % { "op" : op, "readDest" : readDestCode }
1438 for reg in range(2 * rCount):
1439 eWalkCode += '''
1440 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1441 ''' % { "reg" : reg }
1442 iop = InstObjParams(name, Name,
1443 "RegRegRegImmOp",
1444 { "code": eWalkCode,
1445 "r_count": rCount,
1446 "predicate_test": predicateTest,
1447 "op_class": opClass }, [])
1448 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1449 exec_output += NeonUnequalRegExecute.subst(iop)
1450 for type in types:
1451 substDict = { "targs" : type,
1452 "class_name" : Name }
1453 exec_output += NeonExecDeclare.subst(substDict)
1454
1455 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1456 global header_output, exec_output
1457 eWalkCode = simdEnabledCheckCode + '''
1458 typedef FloatReg FloatVect[rCount];
1459 FloatVect srcRegs1, srcRegs2, destRegs;
1460 '''
1461 for reg in range(rCount):
1462 eWalkCode += '''
1463 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1464 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1465 ''' % { "reg" : reg }
1466 if readDest:
1467 eWalkCode += '''
1468 destRegs[%(reg)d] = FpDestP%(reg)d;
1469 ''' % { "reg" : reg }
1470 readDestCode = ''
1471 if readDest:
1472 readDestCode = 'destReg = destRegs[i];'
1473 eWalkCode += '''
1474 if (imm < 0 && imm >= eCount) {
1475 fault = new UndefinedInstruction(machInst, false, mnemonic);
1476 } else {
1477 for (unsigned i = 0; i < rCount; i++) {
1478 FloatReg srcReg1 = srcRegs1[i];
1479 FloatReg srcReg2 = srcRegs2[imm];
1480 FloatReg destReg;
1481 %(readDest)s
1482 %(op)s
1483 destRegs[i] = destReg;
1484 }
1485 }
1486 ''' % { "op" : op, "readDest" : readDestCode }
1487 for reg in range(rCount):
1488 eWalkCode += '''
1489 FpDestP%(reg)d = destRegs[%(reg)d];
1490 ''' % { "reg" : reg }
1491 iop = InstObjParams(name, Name,
1492 "FpRegRegRegImmOp",
1493 { "code": eWalkCode,
1494 "r_count": rCount,
1495 "predicate_test": predicateTest,
1496 "op_class": opClass }, [])
1497 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1498 exec_output += NeonEqualRegExecute.subst(iop)
1499 for type in types:
1500 substDict = { "targs" : type,
1501 "class_name" : Name }
1502 exec_output += NeonExecDeclare.subst(substDict)
1503
1504 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1505 readDest=False, toInt=False, fromInt=False):
1506 global header_output, exec_output
1507 eWalkCode = simdEnabledCheckCode + '''
1508 RegVect srcRegs1, destRegs;
1509 '''
1510 for reg in range(rCount):
1511 eWalkCode += '''
1512 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1513 ''' % { "reg" : reg }
1514 if readDest:
1515 eWalkCode += '''
1516 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1517 ''' % { "reg" : reg }
1518 readDestCode = ''
1519 if readDest:
1520 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1521 if toInt:
1522 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1523 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1524 if fromInt:
1525 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1526 declDest = 'Element destElem;'
1527 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1528 if toInt:
1529 declDest = 'FloatRegBits destReg;'
1530 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1531 eWalkCode += '''
1532 for (unsigned i = 0; i < eCount; i++) {
1533 %(readOp)s
1534 %(declDest)s
1535 %(readDest)s
1536 %(op)s
1537 %(writeDest)s
1538 }
1539 ''' % { "readOp" : readOpCode,
1540 "declDest" : declDest,
1541 "readDest" : readDestCode,
1542 "op" : op,
1543 "writeDest" : writeDestCode }
1544 for reg in range(rCount):
1545 eWalkCode += '''
1546 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1547 ''' % { "reg" : reg }
1548 iop = InstObjParams(name, Name,
1549 "RegRegImmOp",
1550 { "code": eWalkCode,
1551 "r_count": rCount,
1552 "predicate_test": predicateTest,
1553 "op_class": opClass }, [])
1554 header_output += NeonRegRegImmOpDeclare.subst(iop)
1555 exec_output += NeonEqualRegExecute.subst(iop)
1556 for type in types:
1557 substDict = { "targs" : type,
1558 "class_name" : Name }
1559 exec_output += NeonExecDeclare.subst(substDict)
1560
1561 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1562 global header_output, exec_output
1563 eWalkCode = simdEnabledCheckCode + '''
1564 BigRegVect srcReg1;
1565 RegVect destReg;
1566 '''
1567 for reg in range(4):
1568 eWalkCode += '''
1569 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1570 ''' % { "reg" : reg }
1571 if readDest:
1572 for reg in range(2):
1573 eWalkCode += '''
1574 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1575 ''' % { "reg" : reg }
1576 readDestCode = ''
1577 if readDest:
1578 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1579 eWalkCode += '''
1580 for (unsigned i = 0; i < eCount; i++) {
1581 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1582 Element destElem;
1583 %(readDest)s
1584 %(op)s
1585 destReg.elements[i] = htog(destElem);
1586 }
1587 ''' % { "op" : op, "readDest" : readDestCode }
1588 for reg in range(2):
1589 eWalkCode += '''
1590 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1591 ''' % { "reg" : reg }
1592 iop = InstObjParams(name, Name,
1593 "RegRegImmOp",
1594 { "code": eWalkCode,
1595 "r_count": 2,
1596 "predicate_test": predicateTest,
1597 "op_class": opClass }, [])
1598 header_output += NeonRegRegImmOpDeclare.subst(iop)
1599 exec_output += NeonUnequalRegExecute.subst(iop)
1600 for type in types:
1601 substDict = { "targs" : type,
1602 "class_name" : Name }
1603 exec_output += NeonExecDeclare.subst(substDict)
1604
1605 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1606 global header_output, exec_output
1607 eWalkCode = simdEnabledCheckCode + '''
1608 RegVect srcReg1;
1609 BigRegVect destReg;
1610 '''
1611 for reg in range(2):
1612 eWalkCode += '''
1613 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1614 ''' % { "reg" : reg }
1615 if readDest:
1616 for reg in range(4):
1617 eWalkCode += '''
1618 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1619 ''' % { "reg" : reg }
1620 readDestCode = ''
1621 if readDest:
1622 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1623 eWalkCode += '''
1624 for (unsigned i = 0; i < eCount; i++) {
1625 Element srcElem1 = gtoh(srcReg1.elements[i]);
1626 BigElement destElem;
1627 %(readDest)s
1628 %(op)s
1629 destReg.elements[i] = htog(destElem);
1630 }
1631 ''' % { "op" : op, "readDest" : readDestCode }
1632 for reg in range(4):
1633 eWalkCode += '''
1634 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1635 ''' % { "reg" : reg }
1636 iop = InstObjParams(name, Name,
1637 "RegRegImmOp",
1638 { "code": eWalkCode,
1639 "r_count": 2,
1640 "predicate_test": predicateTest,
1641 "op_class": opClass }, [])
1642 header_output += NeonRegRegImmOpDeclare.subst(iop)
1643 exec_output += NeonUnequalRegExecute.subst(iop)
1644 for type in types:
1645 substDict = { "targs" : type,
1646 "class_name" : Name }
1647 exec_output += NeonExecDeclare.subst(substDict)
1648
1649 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1650 global header_output, exec_output
1651 eWalkCode = simdEnabledCheckCode + '''
1652 RegVect srcReg1, destReg;
1653 '''
1654 for reg in range(rCount):
1655 eWalkCode += '''
1656 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1657 ''' % { "reg" : reg }
1658 if readDest:
1659 eWalkCode += '''
1660 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1661 ''' % { "reg" : reg }
1662 readDestCode = ''
1663 if readDest:
1664 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1665 eWalkCode += '''
1666 for (unsigned i = 0; i < eCount; i++) {
1667 unsigned j = i;
1668 Element srcElem1 = gtoh(srcReg1.elements[i]);
1669 Element destElem;
1670 %(readDest)s
1671 %(op)s
1672 destReg.elements[j] = htog(destElem);
1673 }
1674 ''' % { "op" : op, "readDest" : readDestCode }
1675 for reg in range(rCount):
1676 eWalkCode += '''
1677 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1678 ''' % { "reg" : reg }
1679 iop = InstObjParams(name, Name,
1680 "RegRegOp",
1681 { "code": eWalkCode,
1682 "r_count": rCount,
1683 "predicate_test": predicateTest,
1684 "op_class": opClass }, [])
1685 header_output += NeonRegRegOpDeclare.subst(iop)
1686 exec_output += NeonEqualRegExecute.subst(iop)
1687 for type in types:
1688 substDict = { "targs" : type,
1689 "class_name" : Name }
1690 exec_output += NeonExecDeclare.subst(substDict)
1691
1692 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1693 global header_output, exec_output
1694 eWalkCode = simdEnabledCheckCode + '''
1695 RegVect srcReg1, destReg;
1696 '''
1697 for reg in range(rCount):
1698 eWalkCode += '''
1699 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1700 ''' % { "reg" : reg }
1701 if readDest:
1702 eWalkCode += '''
1703 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1704 ''' % { "reg" : reg }
1705 readDestCode = ''
1706 if readDest:
1707 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1708 eWalkCode += '''
1709 for (unsigned i = 0; i < eCount; i++) {
1710 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1711 Element destElem;
1712 %(readDest)s
1713 %(op)s
1714 destReg.elements[i] = htog(destElem);
1715 }
1716 ''' % { "op" : op, "readDest" : readDestCode }
1717 for reg in range(rCount):
1718 eWalkCode += '''
1719 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1720 ''' % { "reg" : reg }
1721 iop = InstObjParams(name, Name,
1722 "RegRegImmOp",
1723 { "code": eWalkCode,
1724 "r_count": rCount,
1725 "predicate_test": predicateTest,
1726 "op_class": opClass }, [])
1727 header_output += NeonRegRegImmOpDeclare.subst(iop)
1728 exec_output += NeonEqualRegExecute.subst(iop)
1729 for type in types:
1730 substDict = { "targs" : type,
1731 "class_name" : Name }
1732 exec_output += NeonExecDeclare.subst(substDict)
1733
1734 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1735 global header_output, exec_output
1736 eWalkCode = simdEnabledCheckCode + '''
1737 RegVect srcReg1, destReg;
1738 '''
1739 for reg in range(rCount):
1740 eWalkCode += '''
1741 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1742 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1743 ''' % { "reg" : reg }
1744 if readDest:
1745 eWalkCode += '''
1746 ''' % { "reg" : reg }
1747 readDestCode = ''
1748 if readDest:
1749 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1750 eWalkCode += op
1751 for reg in range(rCount):
1752 eWalkCode += '''
1753 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1754 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1755 ''' % { "reg" : reg }
1756 iop = InstObjParams(name, Name,
1757 "RegRegOp",
1758 { "code": eWalkCode,
1759 "r_count": rCount,
1760 "predicate_test": predicateTest,
1761 "op_class": opClass }, [])
1762 header_output += NeonRegRegOpDeclare.subst(iop)
1763 exec_output += NeonEqualRegExecute.subst(iop)
1764 for type in types:
1765 substDict = { "targs" : type,
1766 "class_name" : Name }
1767 exec_output += NeonExecDeclare.subst(substDict)
1768
1769 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1770 readDest=False, toInt=False):
1771 global header_output, exec_output
1772 eWalkCode = simdEnabledCheckCode + '''
1773 typedef FloatReg FloatVect[rCount];
1774 FloatVect srcRegs1;
1775 '''
1776 if toInt:
1777 eWalkCode += 'RegVect destRegs;\n'
1778 else:
1779 eWalkCode += 'FloatVect destRegs;\n'
1780 for reg in range(rCount):
1781 eWalkCode += '''
1782 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1783 ''' % { "reg" : reg }
1784 if readDest:
1785 if toInt:
1786 eWalkCode += '''
1787 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1788 ''' % { "reg" : reg }
1789 else:
1790 eWalkCode += '''
1791 destRegs[%(reg)d] = FpDestP%(reg)d;
1792 ''' % { "reg" : reg }
1793 readDestCode = ''
1794 if readDest:
1795 readDestCode = 'destReg = destRegs[i];'
1796 destType = 'FloatReg'
1797 writeDest = 'destRegs[r] = destReg;'
1798 if toInt:
1799 destType = 'FloatRegBits'
1800 writeDest = 'destRegs.regs[r] = destReg;'
1801 eWalkCode += '''
1802 for (unsigned r = 0; r < rCount; r++) {
1803 FloatReg srcReg1 = srcRegs1[r];
1804 %(destType)s destReg;
1805 %(readDest)s
1806 %(op)s
1807 %(writeDest)s
1808 }
1809 ''' % { "op" : op,
1810 "readDest" : readDestCode,
1811 "destType" : destType,
1812 "writeDest" : writeDest }
1813 for reg in range(rCount):
1814 if toInt:
1815 eWalkCode += '''
1816 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1817 ''' % { "reg" : reg }
1818 else:
1819 eWalkCode += '''
1820 FpDestP%(reg)d = destRegs[%(reg)d];
1821 ''' % { "reg" : reg }
1822 iop = InstObjParams(name, Name,
1823 "FpRegRegOp",
1824 { "code": eWalkCode,
1825 "r_count": rCount,
1826 "predicate_test": predicateTest,
1827 "op_class": opClass }, [])
1828 header_output += NeonRegRegOpDeclare.subst(iop)
1829 exec_output += NeonEqualRegExecute.subst(iop)
1830 for type in types:
1831 substDict = { "targs" : type,
1832 "class_name" : Name }
1833 exec_output += NeonExecDeclare.subst(substDict)
1834
1835 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1836 global header_output, exec_output
1837 eWalkCode = simdEnabledCheckCode + '''
1838 RegVect srcRegs;
1839 BigRegVect destReg;
1840 '''
1841 for reg in range(rCount):
1842 eWalkCode += '''
1843 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1844 ''' % { "reg" : reg }
1845 if readDest:
1846 eWalkCode += '''
1847 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1848 ''' % { "reg" : reg }
1849 readDestCode = ''
1850 if readDest:
1851 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1852 eWalkCode += '''
1853 for (unsigned i = 0; i < eCount / 2; i++) {
1854 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1855 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1856 BigElement destElem;
1857 %(readDest)s
1858 %(op)s
1859 destReg.elements[i] = htog(destElem);
1860 }
1861 ''' % { "op" : op, "readDest" : readDestCode }
1862 for reg in range(rCount):
1863 eWalkCode += '''
1864 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1865 ''' % { "reg" : reg }
1866 iop = InstObjParams(name, Name,
1867 "RegRegOp",
1868 { "code": eWalkCode,
1869 "r_count": rCount,
1870 "predicate_test": predicateTest,
1871 "op_class": opClass }, [])
1872 header_output += NeonRegRegOpDeclare.subst(iop)
1873 exec_output += NeonUnequalRegExecute.subst(iop)
1874 for type in types:
1875 substDict = { "targs" : type,
1876 "class_name" : Name }
1877 exec_output += NeonExecDeclare.subst(substDict)
1878
1879 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1880 global header_output, exec_output
1881 eWalkCode = simdEnabledCheckCode + '''
1882 BigRegVect srcReg1;
1883 RegVect destReg;
1884 '''
1885 for reg in range(4):
1886 eWalkCode += '''
1887 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1888 ''' % { "reg" : reg }
1889 if readDest:
1890 for reg in range(2):
1891 eWalkCode += '''
1892 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1893 ''' % { "reg" : reg }
1894 readDestCode = ''
1895 if readDest:
1896 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1897 eWalkCode += '''
1898 for (unsigned i = 0; i < eCount; i++) {
1899 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1900 Element destElem;
1901 %(readDest)s
1902 %(op)s
1903 destReg.elements[i] = htog(destElem);
1904 }
1905 ''' % { "op" : op, "readDest" : readDestCode }
1906 for reg in range(2):
1907 eWalkCode += '''
1908 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1909 ''' % { "reg" : reg }
1910 iop = InstObjParams(name, Name,
1911 "RegRegOp",
1912 { "code": eWalkCode,
1913 "r_count": 2,
1914 "predicate_test": predicateTest,
1915 "op_class": opClass }, [])
1916 header_output += NeonRegRegOpDeclare.subst(iop)
1917 exec_output += NeonUnequalRegExecute.subst(iop)
1918 for type in types:
1919 substDict = { "targs" : type,
1920 "class_name" : Name }
1921 exec_output += NeonExecDeclare.subst(substDict)
1922
1923 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1924 global header_output, exec_output
1925 eWalkCode = simdEnabledCheckCode + '''
1926 RegVect destReg;
1927 '''
1928 if readDest:
1929 for reg in range(rCount):
1930 eWalkCode += '''
1931 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1932 ''' % { "reg" : reg }
1933 readDestCode = ''
1934 if readDest:
1935 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1936 eWalkCode += '''
1937 for (unsigned i = 0; i < eCount; i++) {
1938 Element destElem;
1939 %(readDest)s
1940 %(op)s
1941 destReg.elements[i] = htog(destElem);
1942 }
1943 ''' % { "op" : op, "readDest" : readDestCode }
1944 for reg in range(rCount):
1945 eWalkCode += '''
1946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1947 ''' % { "reg" : reg }
1948 iop = InstObjParams(name, Name,
1949 "RegImmOp",
1950 { "code": eWalkCode,
1951 "r_count": rCount,
1952 "predicate_test": predicateTest,
1953 "op_class": opClass }, [])
1954 header_output += NeonRegImmOpDeclare.subst(iop)
1955 exec_output += NeonEqualRegExecute.subst(iop)
1956 for type in types:
1957 substDict = { "targs" : type,
1958 "class_name" : Name }
1959 exec_output += NeonExecDeclare.subst(substDict)
1960
1961 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1962 global header_output, exec_output
1963 eWalkCode = simdEnabledCheckCode + '''
1964 RegVect srcReg1;
1965 BigRegVect destReg;
1966 '''
1967 for reg in range(2):
1968 eWalkCode += '''
1969 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1970 ''' % { "reg" : reg }
1971 if readDest:
1972 for reg in range(4):
1973 eWalkCode += '''
1974 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1975 ''' % { "reg" : reg }
1976 readDestCode = ''
1977 if readDest:
1978 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1979 eWalkCode += '''
1980 for (unsigned i = 0; i < eCount; i++) {
1981 Element srcElem1 = gtoh(srcReg1.elements[i]);
1982 BigElement destElem;
1983 %(readDest)s
1984 %(op)s
1985 destReg.elements[i] = htog(destElem);
1986 }
1987 ''' % { "op" : op, "readDest" : readDestCode }
1988 for reg in range(4):
1989 eWalkCode += '''
1990 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1991 ''' % { "reg" : reg }
1992 iop = InstObjParams(name, Name,
1993 "RegRegOp",
1994 { "code": eWalkCode,
1995 "r_count": 2,
1996 "predicate_test": predicateTest,
1997 "op_class": opClass }, [])
1998 header_output += NeonRegRegOpDeclare.subst(iop)
1999 exec_output += NeonUnequalRegExecute.subst(iop)
2000 for type in types:
2001 substDict = { "targs" : type,
2002 "class_name" : Name }
2003 exec_output += NeonExecDeclare.subst(substDict)
2004
2005 vhaddCode = '''
2006 Element carryBit =
2007 (((unsigned)srcElem1 & 0x1) +
2008 ((unsigned)srcElem2 & 0x1)) >> 1;
2009 // Use division instead of a shift to ensure the sign extension works
2010 // right. The compiler will figure out if it can be a shift. Mask the
2011 // inputs so they get truncated correctly.
2012 destElem = (((srcElem1 & ~(Element)1) / 2) +
2013 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2014 '''
2015 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2016 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2017
2018 vrhaddCode = '''
2019 Element carryBit =
2020 (((unsigned)srcElem1 & 0x1) +
2021 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2022 // Use division instead of a shift to ensure the sign extension works
2023 // right. The compiler will figure out if it can be a shift. Mask the
2024 // inputs so they get truncated correctly.
2025 destElem = (((srcElem1 & ~(Element)1) / 2) +
2026 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2027 '''
2028 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2029 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2030
2031 vhsubCode = '''
2032 Element barrowBit =
2033 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2034 // Use division instead of a shift to ensure the sign extension works
2035 // right. The compiler will figure out if it can be a shift. Mask the
2036 // inputs so they get truncated correctly.
2037 destElem = (((srcElem1 & ~(Element)1) / 2) -
2038 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2039 '''
2040 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2041 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2042
2043 vandCode = '''
2044 destElem = srcElem1 & srcElem2;
2045 '''
2046 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2047 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2048
2049 vbicCode = '''
2050 destElem = srcElem1 & ~srcElem2;
2051 '''
2052 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2053 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2054
2055 vorrCode = '''
2056 destElem = srcElem1 | srcElem2;
2057 '''
2058 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2059 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2060
2061 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2062 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2063
2064 vornCode = '''
2065 destElem = srcElem1 | ~srcElem2;
2066 '''
2067 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2068 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2069
2070 veorCode = '''
2071 destElem = srcElem1 ^ srcElem2;
2072 '''
2073 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2074 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2075
2076 vbifCode = '''
2077 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2078 '''
2079 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2080 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2081 vbitCode = '''
2082 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2083 '''
2084 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2085 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2086 vbslCode = '''
2087 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2088 '''
2089 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2090 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2091
2092 vmaxCode = '''
2093 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2094 '''
2095 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2096 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2097
2098 vminCode = '''
2099 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2100 '''
2101 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2102 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2103
2104 vaddCode = '''
2105 destElem = srcElem1 + srcElem2;
2106 '''
2107 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2108 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2109
2110 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2111 2, vaddCode, pairwise=True)
2112 vaddlwCode = '''
2113 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2114 '''
2115 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2116 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2117 vaddhnCode = '''
2118 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2119 (sizeof(Element) * 8);
2120 '''
2121 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2122 vraddhnCode = '''
2123 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2124 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2125 (sizeof(Element) * 8);
2126 '''
2127 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2128
2129 vsubCode = '''
2130 destElem = srcElem1 - srcElem2;
2131 '''
2132 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2133 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2134 vsublwCode = '''
2135 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2136 '''
2137 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2138 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2139
2140 vqaddUCode = '''
2141 destElem = srcElem1 + srcElem2;
2142 FPSCR fpscr = (FPSCR) FpscrQc;
2143 if (destElem < srcElem1 || destElem < srcElem2) {
2144 destElem = (Element)(-1);
2145 fpscr.qc = 1;
2146 }
2147 FpscrQc = fpscr;
2148 '''
2149 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2150 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2151 vsubhnCode = '''
2152 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2153 (sizeof(Element) * 8);
2154 '''
2155 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2156 vrsubhnCode = '''
2157 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2158 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2159 (sizeof(Element) * 8);
2160 '''
2161 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2162
2163 vqaddSCode = '''
2164 destElem = srcElem1 + srcElem2;
2165 FPSCR fpscr = (FPSCR) FpscrQc;
2166 bool negDest = (destElem < 0);
2167 bool negSrc1 = (srcElem1 < 0);
2168 bool negSrc2 = (srcElem2 < 0);
2169 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2170 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2171 if (negDest)
2172 destElem -= 1;
2173 fpscr.qc = 1;
2174 }
2175 FpscrQc = fpscr;
2176 '''
2177 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2178 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2179
2180 vqsubUCode = '''
2181 destElem = srcElem1 - srcElem2;
2182 FPSCR fpscr = (FPSCR) FpscrQc;
2183 if (destElem > srcElem1) {
2184 destElem = 0;
2185 fpscr.qc = 1;
2186 }
2187 FpscrQc = fpscr;
2188 '''
2189 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2190 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2191
2192 vqsubSCode = '''
2193 destElem = srcElem1 - srcElem2;
2194 FPSCR fpscr = (FPSCR) FpscrQc;
2195 bool negDest = (destElem < 0);
2196 bool negSrc1 = (srcElem1 < 0);
2197 bool posSrc2 = (srcElem2 >= 0);
2198 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2199 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2200 if (negDest)
2201 destElem -= 1;
2202 fpscr.qc = 1;
2203 }
2204 FpscrQc = fpscr;
2205 '''
2206 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2207 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2208
2209 vcgtCode = '''
2210 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2211 '''
2212 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2213 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2214
2215 vcgeCode = '''
2216 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2217 '''
2218 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2219 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2220
2221 vceqCode = '''
2222 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2223 '''
2224 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2225 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2226
2227 vshlCode = '''
2228 int16_t shiftAmt = (int8_t)srcElem2;
2229 if (shiftAmt < 0) {
2230 shiftAmt = -shiftAmt;
2231 if (shiftAmt >= sizeof(Element) * 8) {
2232 shiftAmt = sizeof(Element) * 8 - 1;
2233 destElem = 0;
2234 } else {
2235 destElem = (srcElem1 >> shiftAmt);
2236 }
2237 // Make sure the right shift sign extended when it should.
2238 if (ltz(srcElem1) && !ltz(destElem)) {
2239 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2240 1 - shiftAmt));
2241 }
2242 } else {
2243 if (shiftAmt >= sizeof(Element) * 8) {
2244 destElem = 0;
2245 } else {
2246 destElem = srcElem1 << shiftAmt;
2247 }
2248 }
2249 '''
2250 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2251 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2252
2253 vrshlCode = '''
2254 int16_t shiftAmt = (int8_t)srcElem2;
2255 if (shiftAmt < 0) {
2256 shiftAmt = -shiftAmt;
2257 Element rBit = 0;
2258 if (shiftAmt <= sizeof(Element) * 8)
2259 rBit = bits(srcElem1, shiftAmt - 1);
2260 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2261 rBit = 1;
2262 if (shiftAmt >= sizeof(Element) * 8) {
2263 shiftAmt = sizeof(Element) * 8 - 1;
2264 destElem = 0;
2265 } else {
2266 destElem = (srcElem1 >> shiftAmt);
2267 }
2268 // Make sure the right shift sign extended when it should.
2269 if (ltz(srcElem1) && !ltz(destElem)) {
2270 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2271 1 - shiftAmt));
2272 }
2273 destElem += rBit;
2274 } else if (shiftAmt > 0) {
2275 if (shiftAmt >= sizeof(Element) * 8) {
2276 destElem = 0;
2277 } else {
2278 destElem = srcElem1 << shiftAmt;
2279 }
2280 } else {
2281 destElem = srcElem1;
2282 }
2283 '''
2284 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2285 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2286
2287 vqshlUCode = '''
2288 int16_t shiftAmt = (int8_t)srcElem2;
2289 FPSCR fpscr = (FPSCR) FpscrQc;
2290 if (shiftAmt < 0) {
2291 shiftAmt = -shiftAmt;
2292 if (shiftAmt >= sizeof(Element) * 8) {
2293 shiftAmt = sizeof(Element) * 8 - 1;
2294 destElem = 0;
2295 } else {
2296 destElem = (srcElem1 >> shiftAmt);
2297 }
2298 } else if (shiftAmt > 0) {
2299 if (shiftAmt >= sizeof(Element) * 8) {
2300 if (srcElem1 != 0) {
2301 destElem = mask(sizeof(Element) * 8);
2302 fpscr.qc = 1;
2303 } else {
2304 destElem = 0;
2305 }
2306 } else {
2307 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2308 sizeof(Element) * 8 - shiftAmt)) {
2309 destElem = mask(sizeof(Element) * 8);
2310 fpscr.qc = 1;
2311 } else {
2312 destElem = srcElem1 << shiftAmt;
2313 }
2314 }
2315 } else {
2316 destElem = srcElem1;
2317 }
2318 FpscrQc = fpscr;
2319 '''
2320 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2321 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2322
2323 vqshlSCode = '''
2324 int16_t shiftAmt = (int8_t)srcElem2;
2325 FPSCR fpscr = (FPSCR) FpscrQc;
2326 if (shiftAmt < 0) {
2327 shiftAmt = -shiftAmt;
2328 if (shiftAmt >= sizeof(Element) * 8) {
2329 shiftAmt = sizeof(Element) * 8 - 1;
2330 destElem = 0;
2331 } else {
2332 destElem = (srcElem1 >> shiftAmt);
2333 }
2334 // Make sure the right shift sign extended when it should.
2335 if (srcElem1 < 0 && destElem >= 0) {
2336 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2337 1 - shiftAmt));
2338 }
2339 } else if (shiftAmt > 0) {
2340 bool sat = false;
2341 if (shiftAmt >= sizeof(Element) * 8) {
2342 if (srcElem1 != 0)
2343 sat = true;
2344 else
2345 destElem = 0;
2346 } else {
2347 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2348 sizeof(Element) * 8 - 1 - shiftAmt) !=
2349 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2350 sat = true;
2351 } else {
2352 destElem = srcElem1 << shiftAmt;
2353 }
2354 }
2355 if (sat) {
2356 fpscr.qc = 1;
2357 destElem = mask(sizeof(Element) * 8 - 1);
2358 if (srcElem1 < 0)
2359 destElem = ~destElem;
2360 }
2361 } else {
2362 destElem = srcElem1;
2363 }
2364 FpscrQc = fpscr;
2365 '''
2366 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2367 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2368
2369 vqrshlUCode = '''
2370 int16_t shiftAmt = (int8_t)srcElem2;
2371 FPSCR fpscr = (FPSCR) FpscrQc;
2372 if (shiftAmt < 0) {
2373 shiftAmt = -shiftAmt;
2374 Element rBit = 0;
2375 if (shiftAmt <= sizeof(Element) * 8)
2376 rBit = bits(srcElem1, shiftAmt - 1);
2377 if (shiftAmt >= sizeof(Element) * 8) {
2378 shiftAmt = sizeof(Element) * 8 - 1;
2379 destElem = 0;
2380 } else {
2381 destElem = (srcElem1 >> shiftAmt);
2382 }
2383 destElem += rBit;
2384 } else {
2385 if (shiftAmt >= sizeof(Element) * 8) {
2386 if (srcElem1 != 0) {
2387 destElem = mask(sizeof(Element) * 8);
2388 fpscr.qc = 1;
2389 } else {
2390 destElem = 0;
2391 }
2392 } else {
2393 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2394 sizeof(Element) * 8 - shiftAmt)) {
2395 destElem = mask(sizeof(Element) * 8);
2396 fpscr.qc = 1;
2397 } else {
2398 destElem = srcElem1 << shiftAmt;
2399 }
2400 }
2401 }
2402 FpscrQc = fpscr;
2403 '''
2404 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2405 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2406
2407 vqrshlSCode = '''
2408 int16_t shiftAmt = (int8_t)srcElem2;
2409 FPSCR fpscr = (FPSCR) FpscrQc;
2410 if (shiftAmt < 0) {
2411 shiftAmt = -shiftAmt;
2412 Element rBit = 0;
2413 if (shiftAmt <= sizeof(Element) * 8)
2414 rBit = bits(srcElem1, shiftAmt - 1);
2415 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2416 rBit = 1;
2417 if (shiftAmt >= sizeof(Element) * 8) {
2418 shiftAmt = sizeof(Element) * 8 - 1;
2419 destElem = 0;
2420 } else {
2421 destElem = (srcElem1 >> shiftAmt);
2422 }
2423 // Make sure the right shift sign extended when it should.
2424 if (srcElem1 < 0 && destElem >= 0) {
2425 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2426 1 - shiftAmt));
2427 }
2428 destElem += rBit;
2429 } else if (shiftAmt > 0) {
2430 bool sat = false;
2431 if (shiftAmt >= sizeof(Element) * 8) {
2432 if (srcElem1 != 0)
2433 sat = true;
2434 else
2435 destElem = 0;
2436 } else {
2437 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2438 sizeof(Element) * 8 - 1 - shiftAmt) !=
2439 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2440 sat = true;
2441 } else {
2442 destElem = srcElem1 << shiftAmt;
2443 }
2444 }
2445 if (sat) {
2446 fpscr.qc = 1;
2447 destElem = mask(sizeof(Element) * 8 - 1);
2448 if (srcElem1 < 0)
2449 destElem = ~destElem;
2450 }
2451 } else {
2452 destElem = srcElem1;
2453 }
2454 FpscrQc = fpscr;
2455 '''
2456 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2457 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2458
2459 vabaCode = '''
2460 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2461 (srcElem2 - srcElem1);
2462 '''
2463 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2464 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2465 vabalCode = '''
2466 destElem += (srcElem1 > srcElem2) ?
2467 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2468 ((BigElement)srcElem2 - (BigElement)srcElem1);
2469 '''
2470 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2471
2472 vabdCode = '''
2473 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2474 (srcElem2 - srcElem1);
2475 '''
2476 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2477 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2478 vabdlCode = '''
2479 destElem = (srcElem1 > srcElem2) ?
2480 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2481 ((BigElement)srcElem2 - (BigElement)srcElem1);
2482 '''
2483 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2484
2485 vtstCode = '''
2486 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2487 '''
2488 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2489 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2490
2491 vmulCode = '''
2492 destElem = srcElem1 * srcElem2;
2493 '''
2494 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2495 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2496 vmullCode = '''
2497 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2498 '''
2499 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2500
2501 vmlaCode = '''
2502 destElem = destElem + srcElem1 * srcElem2;
2503 '''
2504 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2505 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2506 vmlalCode = '''
2507 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2508 '''
2509 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2510
2511 vqdmlalCode = '''
2512 FPSCR fpscr = (FPSCR) FpscrQc;
2513 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2514 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2515 Element halfNeg = maxNeg / 2;
2516 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2517 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2518 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2519 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2520 fpscr.qc = 1;
2521 }
2522 bool negPreDest = ltz(destElem);
2523 destElem += midElem;
2524 bool negDest = ltz(destElem);
2525 bool negMid = ltz(midElem);
2526 if (negPreDest == negMid && negMid != negDest) {
2527 destElem = mask(sizeof(BigElement) * 8 - 1);
2528 if (negPreDest)
2529 destElem = ~destElem;
2530 fpscr.qc = 1;
2531 }
2532 FpscrQc = fpscr;
2533 '''
2534 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2535
2536 vqdmlslCode = '''
2537 FPSCR fpscr = (FPSCR) FpscrQc;
2538 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2539 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2540 Element halfNeg = maxNeg / 2;
2541 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2542 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2543 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2544 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2545 fpscr.qc = 1;
2546 }
2547 bool negPreDest = ltz(destElem);
2548 destElem -= midElem;
2549 bool negDest = ltz(destElem);
2550 bool posMid = ltz((BigElement)-midElem);
2551 if (negPreDest == posMid && posMid != negDest) {
2552 destElem = mask(sizeof(BigElement) * 8 - 1);
2553 if (negPreDest)
2554 destElem = ~destElem;
2555 fpscr.qc = 1;
2556 }
2557 FpscrQc = fpscr;
2558 '''
2559 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2560
2561 vqdmullCode = '''
2562 FPSCR fpscr = (FPSCR) FpscrQc;
2563 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2564 if (srcElem1 == srcElem2 &&
2565 srcElem1 == (Element)((Element)1 <<
2566 (Element)(sizeof(Element) * 8 - 1))) {
2567 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2568 fpscr.qc = 1;
2569 }
2570 FpscrQc = fpscr;
2571 '''
2572 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2573
2574 vmlsCode = '''
2575 destElem = destElem - srcElem1 * srcElem2;
2576 '''
2577 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2578 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2579 vmlslCode = '''
2580 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2581 '''
2582 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2583
2584 vmulpCode = '''
2585 destElem = 0;
2586 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2587 if (bits(srcElem2, j))
2588 destElem ^= srcElem1 << j;
2589 }
2590 '''
2591 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2592 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2593 vmullpCode = '''
2594 destElem = 0;
2595 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2596 if (bits(srcElem2, j))
2597 destElem ^= (BigElement)srcElem1 << j;
2598 }
2599 '''
2600 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2601
2602 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2603
2604 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2605
2606 vqdmulhCode = '''
2607 FPSCR fpscr = (FPSCR) FpscrQc;
2608 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2609 (sizeof(Element) * 8);
2610 if (srcElem1 == srcElem2 &&
2611 srcElem1 == (Element)((Element)1 <<
2612 (sizeof(Element) * 8 - 1))) {
2613 destElem = ~srcElem1;
2614 fpscr.qc = 1;
2615 }
2616 FpscrQc = fpscr;
2617 '''
2618 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2619 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2620
2621 vqrdmulhCode = '''
2622 FPSCR fpscr = (FPSCR) FpscrQc;
2623 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2624 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2625 (sizeof(Element) * 8);
2626 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2627 Element halfNeg = maxNeg / 2;
2628 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2629 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2630 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2631 if (destElem < 0) {
2632 destElem = mask(sizeof(Element) * 8 - 1);
2633 } else {
2634 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2635 }
2636 fpscr.qc = 1;
2637 }
2638 FpscrQc = fpscr;
2639 '''
2640 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2641 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2642 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2643 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2644
2645 vmaxfpCode = '''
2646 FPSCR fpscr = (FPSCR) FpscrExc;
2647 bool done;
2648 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2649 if (!done) {
2650 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2651 true, true, VfpRoundNearest);
2652 } else if (flushToZero(srcReg1, srcReg2)) {
2653 fpscr.idc = 1;
2654 }
2655 FpscrExc = fpscr;
2656 '''
2657 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2658 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2659
2660 vminfpCode = '''
2661 FPSCR fpscr = (FPSCR) FpscrExc;
2662 bool done;
2663 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2664 if (!done) {
2665 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2666 true, true, VfpRoundNearest);
2667 } else if (flushToZero(srcReg1, srcReg2)) {
2668 fpscr.idc = 1;
2669 }
2670 FpscrExc = fpscr;
2671 '''
2672 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2673 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2674
2675 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2676 2, vmaxfpCode, pairwise=True)
2677 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2678 4, vmaxfpCode, pairwise=True)
2679
2680 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2681 2, vminfpCode, pairwise=True)
2682 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2683 4, vminfpCode, pairwise=True)
2684
2685 vaddfpCode = '''
2686 FPSCR fpscr = (FPSCR) FpscrExc;
2687 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2688 true, true, VfpRoundNearest);
2689 FpscrExc = fpscr;
2690 '''
2691 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2692 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2693
2694 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2695 2, vaddfpCode, pairwise=True)
2696 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2697 4, vaddfpCode, pairwise=True)
2698
2699 vsubfpCode = '''
2700 FPSCR fpscr = (FPSCR) FpscrExc;
2701 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2702 true, true, VfpRoundNearest);
2703 FpscrExc = fpscr;
2704 '''
2705 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2706 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2707
2708 vmulfpCode = '''
2709 FPSCR fpscr = (FPSCR) FpscrExc;
2710 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2711 true, true, VfpRoundNearest);
2712 FpscrExc = fpscr;
2713 '''
2714 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2715 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2716
2717 vmlafpCode = '''
2718 FPSCR fpscr = (FPSCR) FpscrExc;
2719 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2720 true, true, VfpRoundNearest);
2721 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2722 true, true, VfpRoundNearest);
2723 FpscrExc = fpscr;
2724 '''
2725 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2726 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2727
2728 vfmafpCode = '''
2729 FPSCR fpscr = (FPSCR) FpscrExc;
2730 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2731 true, true, VfpRoundNearest);
2732 FpscrExc = fpscr;
2733 '''
2734 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2735 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2736
2737 vfmsfpCode = '''
2738 FPSCR fpscr = (FPSCR) FpscrExc;
2739 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2740 true, true, VfpRoundNearest);
2741 FpscrExc = fpscr;
2742 '''
2743 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2744 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2745
2746 vmlsfpCode = '''
2747 FPSCR fpscr = (FPSCR) FpscrExc;
2748 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2749 true, true, VfpRoundNearest);
2750 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2751 true, true, VfpRoundNearest);
2752 FpscrExc = fpscr;
2753 '''
2754 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2755 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2756
2757 vcgtfpCode = '''
2758 FPSCR fpscr = (FPSCR) FpscrExc;
2759 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2760 true, true, VfpRoundNearest);
2761 destReg = (res == 0) ? -1 : 0;
2762 if (res == 2.0)
2763 fpscr.ioc = 1;
2764 FpscrExc = fpscr;
2765 '''
2766 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2767 2, vcgtfpCode, toInt = True)
2768 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2769 4, vcgtfpCode, toInt = True)
2770
2771 vcgefpCode = '''
2772 FPSCR fpscr = (FPSCR) FpscrExc;
2773 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2774 true, true, VfpRoundNearest);
2775 destReg = (res == 0) ? -1 : 0;
2776 if (res == 2.0)
2777 fpscr.ioc = 1;
2778 FpscrExc = fpscr;
2779 '''
2780 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2781 2, vcgefpCode, toInt = True)
2782 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2783 4, vcgefpCode, toInt = True)
2784
2785 vacgtfpCode = '''
2786 FPSCR fpscr = (FPSCR) FpscrExc;
2787 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2788 true, true, VfpRoundNearest);
2789 destReg = (res == 0) ? -1 : 0;
2790 if (res == 2.0)
2791 fpscr.ioc = 1;
2792 FpscrExc = fpscr;
2793 '''
2794 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2795 2, vacgtfpCode, toInt = True)
2796 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2797 4, vacgtfpCode, toInt = True)
2798
2799 vacgefpCode = '''
2800 FPSCR fpscr = (FPSCR) FpscrExc;
2801 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2802 true, true, VfpRoundNearest);
2803 destReg = (res == 0) ? -1 : 0;
2804 if (res == 2.0)
2805 fpscr.ioc = 1;
2806 FpscrExc = fpscr;
2807 '''
2808 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2809 2, vacgefpCode, toInt = True)
2810 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2811 4, vacgefpCode, toInt = True)
2812
2813 vceqfpCode = '''
2814 FPSCR fpscr = (FPSCR) FpscrExc;
2815 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2816 true, true, VfpRoundNearest);
2817 destReg = (res == 0) ? -1 : 0;
2818 if (res == 2.0)
2819 fpscr.ioc = 1;
2820 FpscrExc = fpscr;
2821 '''
2822 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2823 2, vceqfpCode, toInt = True)
2824 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2825 4, vceqfpCode, toInt = True)
2826
2827 vrecpsCode = '''
2828 FPSCR fpscr = (FPSCR) FpscrExc;
2829 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2830 true, true, VfpRoundNearest);
2831 FpscrExc = fpscr;
2832 '''
2833 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2834 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2835
2836 vrsqrtsCode = '''
2837 FPSCR fpscr = (FPSCR) FpscrExc;
2838 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2839 true, true, VfpRoundNearest);
2840 FpscrExc = fpscr;
2841 '''
2842 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2843 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2844
2845 vabdfpCode = '''
2846 FPSCR fpscr = (FPSCR) FpscrExc;
2847 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2848 true, true, VfpRoundNearest);
2849 destReg = fabs(mid);
2850 FpscrExc = fpscr;
2851 '''
2852 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2853 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2854
2855 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2856 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2857 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2858 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2859 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2860
2861 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2862 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2863 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2864 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2865 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2866
2867 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2868 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2869 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2870 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2871 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2872
2873 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2874 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2875 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2876 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2877 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2878 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2879 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2880 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2881 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2882
2883 vshrCode = '''
2884 if (imm >= sizeof(srcElem1) * 8) {
2885 if (ltz(srcElem1))
2886 destElem = -1;
2887 else
2888 destElem = 0;
2889 } else {
2890 destElem = srcElem1 >> imm;
2891 }
2892 '''
2893 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2894 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2895
2896 vsraCode = '''
2897 Element mid;;
2898 if (imm >= sizeof(srcElem1) * 8) {
2899 mid = ltz(srcElem1) ? -1 : 0;
2900 } else {
2901 mid = srcElem1 >> imm;
2902 if (ltz(srcElem1) && !ltz(mid)) {
2903 mid |= -(mid & ((Element)1 <<
2904 (sizeof(Element) * 8 - 1 - imm)));
2905 }
2906 }
2907 destElem += mid;
2908 '''
2909 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2910 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2911
2912 vrshrCode = '''
2913 if (imm > sizeof(srcElem1) * 8) {
2914 destElem = 0;
2915 } else if (imm) {
2916 Element rBit = bits(srcElem1, imm - 1);
2917 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2918 } else {
2919 destElem = srcElem1;
2920 }
2921 '''
2922 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2923 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2924
2925 vrsraCode = '''
2926 if (imm > sizeof(srcElem1) * 8) {
2927 destElem += 0;
2928 } else if (imm) {
2929 Element rBit = bits(srcElem1, imm - 1);
2930 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2931 } else {
2932 destElem += srcElem1;
2933 }
2934 '''
2935 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2936 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2937
2938 vsriCode = '''
2939 if (imm >= sizeof(Element) * 8)
2940 destElem = destElem;
2941 else
2942 destElem = (srcElem1 >> imm) |
2943 (destElem & ~mask(sizeof(Element) * 8 - imm));
2944 '''
2945 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2946 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2947
2948 vshlCode = '''
2949 if (imm >= sizeof(Element) * 8)
2950 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2951 else
2952 destElem = srcElem1 << imm;
2953 '''
2954 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2955 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2956
2957 vsliCode = '''
2958 if (imm >= sizeof(Element) * 8)
2959 destElem = destElem;
2960 else
2961 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2962 '''
2963 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2964 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2965
2966 vqshlCode = '''
2967 FPSCR fpscr = (FPSCR) FpscrQc;
2968 if (imm >= sizeof(Element) * 8) {
2969 if (srcElem1 != 0) {
2970 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2971 if (srcElem1 > 0)
2972 destElem = ~destElem;
2973 fpscr.qc = 1;
2974 } else {
2975 destElem = 0;
2976 }
2977 } else if (imm) {
2978 destElem = (srcElem1 << imm);
2979 uint64_t topBits = bits((uint64_t)srcElem1,
2980 sizeof(Element) * 8 - 1,
2981 sizeof(Element) * 8 - 1 - imm);
2982 if (topBits != 0 && topBits != mask(imm + 1)) {
2983 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2984 if (srcElem1 > 0)
2985 destElem = ~destElem;
2986 fpscr.qc = 1;
2987 }
2988 } else {
2989 destElem = srcElem1;
2990 }
2991 FpscrQc = fpscr;
2992 '''
2993 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2994 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2995
2996 vqshluCode = '''
2997 FPSCR fpscr = (FPSCR) FpscrQc;
2998 if (imm >= sizeof(Element) * 8) {
2999 if (srcElem1 != 0) {
3000 destElem = mask(sizeof(Element) * 8);
3001 fpscr.qc = 1;
3002 } else {
3003 destElem = 0;
3004 }
3005 } else if (imm) {
3006 destElem = (srcElem1 << imm);
3007 uint64_t topBits = bits((uint64_t)srcElem1,
3008 sizeof(Element) * 8 - 1,
3009 sizeof(Element) * 8 - imm);
3010 if (topBits != 0) {
3011 destElem = mask(sizeof(Element) * 8);
3012 fpscr.qc = 1;
3013 }
3014 } else {
3015 destElem = srcElem1;
3016 }
3017 FpscrQc = fpscr;
3018 '''
3019 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3020 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3021
3022 vqshlusCode = '''
3023 FPSCR fpscr = (FPSCR) FpscrQc;
3024 if (imm >= sizeof(Element) * 8) {
3025 if (srcElem1 < 0) {
3026 destElem = 0;
3027 fpscr.qc = 1;
3028 } else if (srcElem1 > 0) {
3029 destElem = mask(sizeof(Element) * 8);
3030 fpscr.qc = 1;
3031 } else {
3032 destElem = 0;
3033 }
3034 } else if (imm) {
3035 destElem = (srcElem1 << imm);
3036 uint64_t topBits = bits((uint64_t)srcElem1,
3037 sizeof(Element) * 8 - 1,
3038 sizeof(Element) * 8 - imm);
3039 if (srcElem1 < 0) {
3040 destElem = 0;
3041 fpscr.qc = 1;
3042 } else if (topBits != 0) {
3043 destElem = mask(sizeof(Element) * 8);
3044 fpscr.qc = 1;
3045 }
3046 } else {
3047 if (srcElem1 < 0) {
3048 fpscr.qc = 1;
3049 destElem = 0;
3050 } else {
3051 destElem = srcElem1;
3052 }
3053 }
3054 FpscrQc = fpscr;
3055 '''
3056 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3057 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3058
3059 vshrnCode = '''
3060 if (imm >= sizeof(srcElem1) * 8) {
3061 destElem = 0;
3062 } else {
3063 destElem = srcElem1 >> imm;
3064 }
3065 '''
3066 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3067
3068 vrshrnCode = '''
3069 if (imm > sizeof(srcElem1) * 8) {
3070 destElem = 0;
3071 } else if (imm) {
3072 Element rBit = bits(srcElem1, imm - 1);
3073 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3074 } else {
3075 destElem = srcElem1;
3076 }
3077 '''
3078 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3079
3080 vqshrnCode = '''
3081 FPSCR fpscr = (FPSCR) FpscrQc;
3082 if (imm > sizeof(srcElem1) * 8) {
3083 if (srcElem1 != 0 && srcElem1 != -1)
3084 fpscr.qc = 1;
3085 destElem = 0;
3086 } else if (imm) {
3087 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3088 mid |= -(mid & ((BigElement)1 <<
3089 (sizeof(BigElement) * 8 - 1 - imm)));
3090 if (mid != (Element)mid) {
3091 destElem = mask(sizeof(Element) * 8 - 1);
3092 if (srcElem1 < 0)
3093 destElem = ~destElem;
3094 fpscr.qc = 1;
3095 } else {
3096 destElem = mid;
3097 }
3098 } else {
3099 destElem = srcElem1;
3100 }
3101 FpscrQc = fpscr;
3102 '''
3103 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3104
3105 vqshrunCode = '''
3106 FPSCR fpscr = (FPSCR) FpscrQc;
3107 if (imm > sizeof(srcElem1) * 8) {
3108 if (srcElem1 != 0)
3109 fpscr.qc = 1;
3110 destElem = 0;
3111 } else if (imm) {
3112 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3113 if (mid != (Element)mid) {
3114 destElem = mask(sizeof(Element) * 8);
3115 fpscr.qc = 1;
3116 } else {
3117 destElem = mid;
3118 }
3119 } else {
3120 destElem = srcElem1;
3121 }
3122 FpscrQc = fpscr;
3123 '''
3124 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3125 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3126
3127 vqshrunsCode = '''
3128 FPSCR fpscr = (FPSCR) FpscrQc;
3129 if (imm > sizeof(srcElem1) * 8) {
3130 if (srcElem1 != 0)
3131 fpscr.qc = 1;
3132 destElem = 0;
3133 } else if (imm) {
3134 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3135 if (bits(mid, sizeof(BigElement) * 8 - 1,
3136 sizeof(Element) * 8) != 0) {
3137 if (srcElem1 < 0) {
3138 destElem = 0;
3139 } else {
3140 destElem = mask(sizeof(Element) * 8);
3141 }
3142 fpscr.qc = 1;
3143 } else {
3144 destElem = mid;
3145 }
3146 } else {
3147 destElem = srcElem1;
3148 }
3149 FpscrQc = fpscr;
3150 '''
3151 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3152 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3153
3154 vqrshrnCode = '''
3155 FPSCR fpscr = (FPSCR) FpscrQc;
3156 if (imm > sizeof(srcElem1) * 8) {
3157 if (srcElem1 != 0 && srcElem1 != -1)
3158 fpscr.qc = 1;
3159 destElem = 0;
3160 } else if (imm) {
3161 BigElement mid = (srcElem1 >> (imm - 1));
3162 uint64_t rBit = mid & 0x1;
3163 mid >>= 1;
3164 mid |= -(mid & ((BigElement)1 <<
3165 (sizeof(BigElement) * 8 - 1 - imm)));
3166 mid += rBit;
3167 if (mid != (Element)mid) {
3168 destElem = mask(sizeof(Element) * 8 - 1);
3169 if (srcElem1 < 0)
3170 destElem = ~destElem;
3171 fpscr.qc = 1;
3172 } else {
3173 destElem = mid;
3174 }
3175 } else {
3176 if (srcElem1 != (Element)srcElem1) {
3177 destElem = mask(sizeof(Element) * 8 - 1);
3178 if (srcElem1 < 0)
3179 destElem = ~destElem;
3180 fpscr.qc = 1;
3181 } else {
3182 destElem = srcElem1;
3183 }
3184 }
3185 FpscrQc = fpscr;
3186 '''
3187 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3188 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3189
3190 vqrshrunCode = '''
3191 FPSCR fpscr = (FPSCR) FpscrQc;
3192 if (imm > sizeof(srcElem1) * 8) {
3193 if (srcElem1 != 0)
3194 fpscr.qc = 1;
3195 destElem = 0;
3196 } else if (imm) {
3197 BigElement mid = (srcElem1 >> (imm - 1));
3198 uint64_t rBit = mid & 0x1;
3199 mid >>= 1;
3200 mid += rBit;
3201 if (mid != (Element)mid) {
3202 destElem = mask(sizeof(Element) * 8);
3203 fpscr.qc = 1;
3204 } else {
3205 destElem = mid;
3206 }
3207 } else {
3208 if (srcElem1 != (Element)srcElem1) {
3209 destElem = mask(sizeof(Element) * 8 - 1);
3210 fpscr.qc = 1;
3211 } else {
3212 destElem = srcElem1;
3213 }
3214 }
3215 FpscrQc = fpscr;
3216 '''
3217 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3218 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3219
3220 vqrshrunsCode = '''
3221 FPSCR fpscr = (FPSCR) FpscrQc;
3222 if (imm > sizeof(srcElem1) * 8) {
3223 if (srcElem1 != 0)
3224 fpscr.qc = 1;
3225 destElem = 0;
3226 } else if (imm) {
3227 BigElement mid = (srcElem1 >> (imm - 1));
3228 uint64_t rBit = mid & 0x1;
3229 mid >>= 1;
3230 mid |= -(mid & ((BigElement)1 <<
3231 (sizeof(BigElement) * 8 - 1 - imm)));
3232 mid += rBit;
3233 if (bits(mid, sizeof(BigElement) * 8 - 1,
3234 sizeof(Element) * 8) != 0) {
3235 if (srcElem1 < 0) {
3236 destElem = 0;
3237 } else {
3238 destElem = mask(sizeof(Element) * 8);
3239 }
3240 fpscr.qc = 1;
3241 } else {
3242 destElem = mid;
3243 }
3244 } else {
3245 if (srcElem1 < 0) {
3246 fpscr.qc = 1;
3247 destElem = 0;
3248 } else {
3249 destElem = srcElem1;
3250 }
3251 }
3252 FpscrQc = fpscr;
3253 '''
3254 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3255 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3256
3257 vshllCode = '''
3258 if (imm >= sizeof(destElem) * 8) {
3259 destElem = 0;
3260 } else {
3261 destElem = (BigElement)srcElem1 << imm;
3262 }
3263 '''
3264 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3265
3266 vmovlCode = '''
3267 destElem = srcElem1;
3268 '''
3269 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3270
3271 vcvt2ufxCode = '''
3272 FPSCR fpscr = (FPSCR) FpscrExc;
3273 if (flushToZero(srcElem1))
3274 fpscr.idc = 1;
3275 VfpSavedState state = prepFpState(VfpRoundNearest);
3276 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3277 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3278 __asm__ __volatile__("" :: "m" (destReg));
3279 finishVfp(fpscr, state, true);
3280 FpscrExc = fpscr;
3281 '''
3282 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3283 2, vcvt2ufxCode, toInt = True)
3284 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3285 4, vcvt2ufxCode, toInt = True)
3286
3287 vcvt2sfxCode = '''
3288 FPSCR fpscr = (FPSCR) FpscrExc;
3289 if (flushToZero(srcElem1))
3290 fpscr.idc = 1;
3291 VfpSavedState state = prepFpState(VfpRoundNearest);
3292 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3293 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3294 __asm__ __volatile__("" :: "m" (destReg));
3295 finishVfp(fpscr, state, true);
3296 FpscrExc = fpscr;
3297 '''
3298 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3299 2, vcvt2sfxCode, toInt = True)
3300 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3301 4, vcvt2sfxCode, toInt = True)
3302
3303 vcvtu2fpCode = '''
3304 FPSCR fpscr = (FPSCR) FpscrExc;
3305 VfpSavedState state = prepFpState(VfpRoundNearest);
3306 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3307 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3308 __asm__ __volatile__("" :: "m" (destElem));
3309 finishVfp(fpscr, state, true);
3310 FpscrExc = fpscr;
3311 '''
3312 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3313 2, vcvtu2fpCode, fromInt = True)
3314 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3315 4, vcvtu2fpCode, fromInt = True)
3316
3317 vcvts2fpCode = '''
3318 FPSCR fpscr = (FPSCR) FpscrExc;
3319 VfpSavedState state = prepFpState(VfpRoundNearest);
3320 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3321 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3322 __asm__ __volatile__("" :: "m" (destElem));
3323 finishVfp(fpscr, state, true);
3324 FpscrExc = fpscr;
3325 '''
3326 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3327 2, vcvts2fpCode, fromInt = True)
3328 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3329 4, vcvts2fpCode, fromInt = True)
3330
3331 vcvts2hCode = '''
3332 destElem = 0;
3333 FPSCR fpscr = (FPSCR) FpscrExc;
3334 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3335 if (flushToZero(srcFp1))
3336 fpscr.idc = 1;
3337 VfpSavedState state = prepFpState(VfpRoundNearest);
3338 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3339 : "m" (srcFp1), "m" (destElem));
3340 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3341 fpscr.ahp, srcFp1);
3342 __asm__ __volatile__("" :: "m" (destElem));
3343 finishVfp(fpscr, state, true);
3344 FpscrExc = fpscr;
3345 '''
3346 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3347
3348 vcvth2sCode = '''
3349 destElem = 0;
3350 FPSCR fpscr = (FPSCR) FpscrExc;
3351 VfpSavedState state = prepFpState(VfpRoundNearest);
3352 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3353 : "m" (srcElem1), "m" (destElem));
3354 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3355 __asm__ __volatile__("" :: "m" (destElem));
3356 finishVfp(fpscr, state, true);
3357 FpscrExc = fpscr;
3358 '''
3359 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3360
3361 vrsqrteCode = '''
3362 destElem = unsignedRSqrtEstimate(srcElem1);
3363 '''
3364 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3365 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3366
3367 vrsqrtefpCode = '''
3368 FPSCR fpscr = (FPSCR) FpscrExc;
3369 if (flushToZero(srcReg1))
3370 fpscr.idc = 1;
3371 destReg = fprSqrtEstimate(fpscr, srcReg1);
3372 FpscrExc = fpscr;
3373 '''
3374 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3375 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3376
3377 vrecpeCode = '''
3378 destElem = unsignedRecipEstimate(srcElem1);
3379 '''
3380 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3381 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3382
3383 vrecpefpCode = '''
3384 FPSCR fpscr = (FPSCR) FpscrExc;
3385 if (flushToZero(srcReg1))
3386 fpscr.idc = 1;
3387 destReg = fpRecipEstimate(fpscr, srcReg1);
3388 FpscrExc = fpscr;
3389 '''
3390 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3391 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3392
3393 vrev16Code = '''
3394 destElem = srcElem1;
3395 unsigned groupSize = ((1 << 1) / sizeof(Element));
3396 unsigned reverseMask = (groupSize - 1);
3397 j = i ^ reverseMask;
3398 '''
3399 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3400 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3401 vrev32Code = '''
3402 destElem = srcElem1;
3403 unsigned groupSize = ((1 << 2) / sizeof(Element));
3404 unsigned reverseMask = (groupSize - 1);
3405 j = i ^ reverseMask;
3406 '''
3407 twoRegMiscInst("vrev32", "NVrev32D",
3408 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3409 twoRegMiscInst("vrev32", "NVrev32Q",
3410 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3411 vrev64Code = '''
3412 destElem = srcElem1;
3413 unsigned groupSize = ((1 << 3) / sizeof(Element));
3414 unsigned reverseMask = (groupSize - 1);
3415 j = i ^ reverseMask;
3416 '''
3417 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3418 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3419
3420 split('exec')
3421 exec_output += vcompares + vcomparesL
3422
3417 vpaddlCode = '''
3418 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3419 '''
3420 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3421 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3422
3423 vpadalCode = '''
3424 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3425 '''
3426 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3427 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3428
3429 vclsCode = '''
3430 unsigned count = 0;
3431 if (srcElem1 < 0) {
3432 srcElem1 <<= 1;
3433 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3434 count++;
3435 srcElem1 <<= 1;
3436 }
3437 } else {
3438 srcElem1 <<= 1;
3439 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3440 count++;
3441 srcElem1 <<= 1;
3442 }
3443 }
3444 destElem = count;
3445 '''
3446 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3447 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3448
3449 vclzCode = '''
3450 unsigned count = 0;
3451 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3452 count++;
3453 srcElem1 <<= 1;
3454 }
3455 destElem = count;
3456 '''
3457 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3458 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3459
3460 vcntCode = '''
3461 unsigned count = 0;
3462 while (srcElem1 && count < sizeof(Element) * 8) {
3463 count += srcElem1 & 0x1;
3464 srcElem1 >>= 1;
3465 }
3466 destElem = count;
3467 '''
3468
3469 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3470 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3471
3472 vmvnCode = '''
3473 destElem = ~srcElem1;
3474 '''
3475 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3476 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3477
3478 vqabsCode = '''
3479 FPSCR fpscr = (FPSCR) FpscrQc;
3480 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3481 fpscr.qc = 1;
3482 destElem = ~srcElem1;
3483 } else if (srcElem1 < 0) {
3484 destElem = -srcElem1;
3485 } else {
3486 destElem = srcElem1;
3487 }
3488 FpscrQc = fpscr;
3489 '''
3490 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3491 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3492
3493 vqnegCode = '''
3494 FPSCR fpscr = (FPSCR) FpscrQc;
3495 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3496 fpscr.qc = 1;
3497 destElem = ~srcElem1;
3498 } else {
3499 destElem = -srcElem1;
3500 }
3501 FpscrQc = fpscr;
3502 '''
3503 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3504 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3505
3506 vabsCode = '''
3507 if (srcElem1 < 0) {
3508 destElem = -srcElem1;
3509 } else {
3510 destElem = srcElem1;
3511 }
3512 '''
3513
3514 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3515 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3516 vabsfpCode = '''
3517 union
3518 {
3519 uint32_t i;
3520 float f;
3521 } cStruct;
3522 cStruct.f = srcReg1;
3523 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3524 destReg = cStruct.f;
3525 '''
3526 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3527 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3528
3529 vnegCode = '''
3530 destElem = -srcElem1;
3531 '''
3532 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3533 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3534 vnegfpCode = '''
3535 destReg = -srcReg1;
3536 '''
3537 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3538 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3539
3540 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3541 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3542 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3543 vcgtfpCode = '''
3544 FPSCR fpscr = (FPSCR) FpscrExc;
3545 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3546 true, true, VfpRoundNearest);
3547 destReg = (res == 0) ? -1 : 0;
3548 if (res == 2.0)
3549 fpscr.ioc = 1;
3550 FpscrExc = fpscr;
3551 '''
3552 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3553 2, vcgtfpCode, toInt = True)
3554 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3555 4, vcgtfpCode, toInt = True)
3556
3557 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3558 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3559 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3560 vcgefpCode = '''
3561 FPSCR fpscr = (FPSCR) FpscrExc;
3562 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3563 true, true, VfpRoundNearest);
3564 destReg = (res == 0) ? -1 : 0;
3565 if (res == 2.0)
3566 fpscr.ioc = 1;
3567 FpscrExc = fpscr;
3568 '''
3569 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3570 2, vcgefpCode, toInt = True)
3571 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3572 4, vcgefpCode, toInt = True)
3573
3574 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3575 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3576 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3577 vceqfpCode = '''
3578 FPSCR fpscr = (FPSCR) FpscrExc;
3579 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3580 true, true, VfpRoundNearest);
3581 destReg = (res == 0) ? -1 : 0;
3582 if (res == 2.0)
3583 fpscr.ioc = 1;
3584 FpscrExc = fpscr;
3585 '''
3586 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3587 2, vceqfpCode, toInt = True)
3588 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3589 4, vceqfpCode, toInt = True)
3590
3591 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3592 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3593 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3594 vclefpCode = '''
3595 FPSCR fpscr = (FPSCR) FpscrExc;
3596 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3597 true, true, VfpRoundNearest);
3598 destReg = (res == 0) ? -1 : 0;
3599 if (res == 2.0)
3600 fpscr.ioc = 1;
3601 FpscrExc = fpscr;
3602 '''
3603 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3604 2, vclefpCode, toInt = True)
3605 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3606 4, vclefpCode, toInt = True)
3607
3608 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3609 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3610 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3611 vcltfpCode = '''
3612 FPSCR fpscr = (FPSCR) FpscrExc;
3613 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3614 true, true, VfpRoundNearest);
3615 destReg = (res == 0) ? -1 : 0;
3616 if (res == 2.0)
3617 fpscr.ioc = 1;
3618 FpscrExc = fpscr;
3619 '''
3620 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3621 2, vcltfpCode, toInt = True)
3622 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3623 4, vcltfpCode, toInt = True)
3624
3625 vswpCode = '''
3626 FloatRegBits mid;
3627 for (unsigned r = 0; r < rCount; r++) {
3628 mid = srcReg1.regs[r];
3629 srcReg1.regs[r] = destReg.regs[r];
3630 destReg.regs[r] = mid;
3631 }
3632 '''
3633 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3634 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3635
3636 vtrnCode = '''
3637 Element mid;
3638 for (unsigned i = 0; i < eCount; i += 2) {
3639 mid = srcReg1.elements[i];
3640 srcReg1.elements[i] = destReg.elements[i + 1];
3641 destReg.elements[i + 1] = mid;
3642 }
3643 '''
3644 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3645 smallUnsignedTypes, 2, vtrnCode)
3646 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3647 smallUnsignedTypes, 4, vtrnCode)
3648
3649 vuzpCode = '''
3650 Element mid[eCount];
3651 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3652 for (unsigned i = 0; i < eCount / 2; i++) {
3653 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3654 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3655 destReg.elements[i] = destReg.elements[2 * i];
3656 }
3657 for (unsigned i = 0; i < eCount / 2; i++) {
3658 destReg.elements[eCount / 2 + i] = mid[2 * i];
3659 }
3660 '''
3661 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3662 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3663
3664 vzipCode = '''
3665 Element mid[eCount];
3666 memcpy(&mid, &destReg, sizeof(destReg));
3667 for (unsigned i = 0; i < eCount / 2; i++) {
3668 destReg.elements[2 * i] = mid[i];
3669 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3670 }
3671 for (int i = 0; i < eCount / 2; i++) {
3672 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3673 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3674 }
3675 '''
3676 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3677 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3678
3679 vmovnCode = 'destElem = srcElem1;'
3680 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3681
3682 vdupCode = 'destElem = srcElem1;'
3683 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3684 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3685
3686 def vdupGprInst(name, Name, opClass, types, rCount):
3687 global header_output, exec_output
3688 eWalkCode = '''
3689 RegVect destReg;
3690 for (unsigned i = 0; i < eCount; i++) {
3691 destReg.elements[i] = htog((Element)Op1);
3692 }
3693 '''
3694 for reg in range(rCount):
3695 eWalkCode += '''
3696 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3697 ''' % { "reg" : reg }
3698 iop = InstObjParams(name, Name,
3699 "RegRegOp",
3700 { "code": eWalkCode,
3701 "r_count": rCount,
3702 "predicate_test": predicateTest,
3703 "op_class": opClass }, [])
3704 header_output += NeonRegRegOpDeclare.subst(iop)
3705 exec_output += NeonEqualRegExecute.subst(iop)
3706 for type in types:
3707 substDict = { "targs" : type,
3708 "class_name" : Name }
3709 exec_output += NeonExecDeclare.subst(substDict)
3710 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3711 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3712
3713 vmovCode = 'destElem = imm;'
3714 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3715 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3716
3717 vorrCode = 'destElem |= imm;'
3718 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3719 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3720
3721 vmvnCode = 'destElem = ~imm;'
3722 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3723 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3724
3725 vbicCode = 'destElem &= ~imm;'
3726 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3727 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3728
3729 vqmovnCode = '''
3730 FPSCR fpscr = (FPSCR) FpscrQc;
3731 destElem = srcElem1;
3732 if ((BigElement)destElem != srcElem1) {
3733 fpscr.qc = 1;
3734 destElem = mask(sizeof(Element) * 8 - 1);
3735 if (srcElem1 < 0)
3736 destElem = ~destElem;
3737 }
3738 FpscrQc = fpscr;
3739 '''
3740 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3741
3742 vqmovunCode = '''
3743 FPSCR fpscr = (FPSCR) FpscrQc;
3744 destElem = srcElem1;
3745 if ((BigElement)destElem != srcElem1) {
3746 fpscr.qc = 1;
3747 destElem = mask(sizeof(Element) * 8);
3748 }
3749 FpscrQc = fpscr;
3750 '''
3751 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3752 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3753
3754 vqmovunsCode = '''
3755 FPSCR fpscr = (FPSCR) FpscrQc;
3756 destElem = srcElem1;
3757 if (srcElem1 < 0 ||
3758 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3759 fpscr.qc = 1;
3760 destElem = mask(sizeof(Element) * 8);
3761 if (srcElem1 < 0)
3762 destElem = ~destElem;
3763 }
3764 FpscrQc = fpscr;
3765 '''
3766 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3767 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3768
3769 def buildVext(name, Name, opClass, types, rCount, op):
3770 global header_output, exec_output
3771 eWalkCode = '''
3772 RegVect srcReg1, srcReg2, destReg;
3773 '''
3774 for reg in range(rCount):
3775 eWalkCode += simdEnabledCheckCode + '''
3776 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3777 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3778 ''' % { "reg" : reg }
3779 eWalkCode += op
3780 for reg in range(rCount):
3781 eWalkCode += '''
3782 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3783 ''' % { "reg" : reg }
3784 iop = InstObjParams(name, Name,
3785 "RegRegRegImmOp",
3786 { "code": eWalkCode,
3787 "r_count": rCount,
3788 "predicate_test": predicateTest,
3789 "op_class": opClass }, [])
3790 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3791 exec_output += NeonEqualRegExecute.subst(iop)
3792 for type in types:
3793 substDict = { "targs" : type,
3794 "class_name" : Name }
3795 exec_output += NeonExecDeclare.subst(substDict)
3796
3797 vextCode = '''
3798 for (unsigned i = 0; i < eCount; i++) {
3799 unsigned index = i + imm;
3800 if (index < eCount) {
3801 destReg.elements[i] = srcReg1.elements[index];
3802 } else {
3803 index -= eCount;
3804 if (index >= eCount) {
3805 fault = new UndefinedInstruction(machInst, false, mnemonic);
3806 } else {
3807 destReg.elements[i] = srcReg2.elements[index];
3808 }
3809 }
3810 }
3811 '''
3812 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3813 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3814
3815 def buildVtbxl(name, Name, opClass, length, isVtbl):
3816 global header_output, decoder_output, exec_output
3817 code = '''
3818 union
3819 {
3820 uint8_t bytes[32];
3821 FloatRegBits regs[8];
3822 } table;
3823
3824 union
3825 {
3826 uint8_t bytes[8];
3827 FloatRegBits regs[2];
3828 } destReg, srcReg2;
3829
3830 const unsigned length = %(length)d;
3831 const bool isVtbl = %(isVtbl)s;
3832
3833 srcReg2.regs[0] = htog(FpOp2P0_uw);
3834 srcReg2.regs[1] = htog(FpOp2P1_uw);
3835
3836 destReg.regs[0] = htog(FpDestP0_uw);
3837 destReg.regs[1] = htog(FpDestP1_uw);
3838 ''' % { "length" : length, "isVtbl" : isVtbl }
3839 for reg in range(8):
3840 if reg < length * 2:
3841 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3842 { "reg" : reg }
3843 else:
3844 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3845 code += '''
3846 for (unsigned i = 0; i < sizeof(destReg); i++) {
3847 uint8_t index = srcReg2.bytes[i];
3848 if (index < 8 * length) {
3849 destReg.bytes[i] = table.bytes[index];
3850 } else {
3851 if (isVtbl)
3852 destReg.bytes[i] = 0;
3853 // else destReg.bytes[i] unchanged
3854 }
3855 }
3856
3857 FpDestP0_uw = gtoh(destReg.regs[0]);
3858 FpDestP1_uw = gtoh(destReg.regs[1]);
3859 '''
3860 iop = InstObjParams(name, Name,
3861 "RegRegRegOp",
3862 { "code": code,
3863 "predicate_test": predicateTest,
3864 "op_class": opClass }, [])
3865 header_output += RegRegRegOpDeclare.subst(iop)
3866 decoder_output += RegRegRegOpConstructor.subst(iop)
3867 exec_output += PredOpExecute.subst(iop)
3868
3869 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3870 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3871 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3872 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3873
3874 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3875 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3876 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3877 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3878}};
3423 vpaddlCode = '''
3424 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3425 '''
3426 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3427 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3428
3429 vpadalCode = '''
3430 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3431 '''
3432 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3433 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3434
3435 vclsCode = '''
3436 unsigned count = 0;
3437 if (srcElem1 < 0) {
3438 srcElem1 <<= 1;
3439 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3440 count++;
3441 srcElem1 <<= 1;
3442 }
3443 } else {
3444 srcElem1 <<= 1;
3445 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3446 count++;
3447 srcElem1 <<= 1;
3448 }
3449 }
3450 destElem = count;
3451 '''
3452 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3453 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3454
3455 vclzCode = '''
3456 unsigned count = 0;
3457 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3458 count++;
3459 srcElem1 <<= 1;
3460 }
3461 destElem = count;
3462 '''
3463 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3464 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3465
3466 vcntCode = '''
3467 unsigned count = 0;
3468 while (srcElem1 && count < sizeof(Element) * 8) {
3469 count += srcElem1 & 0x1;
3470 srcElem1 >>= 1;
3471 }
3472 destElem = count;
3473 '''
3474
3475 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3476 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3477
3478 vmvnCode = '''
3479 destElem = ~srcElem1;
3480 '''
3481 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3482 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3483
3484 vqabsCode = '''
3485 FPSCR fpscr = (FPSCR) FpscrQc;
3486 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3487 fpscr.qc = 1;
3488 destElem = ~srcElem1;
3489 } else if (srcElem1 < 0) {
3490 destElem = -srcElem1;
3491 } else {
3492 destElem = srcElem1;
3493 }
3494 FpscrQc = fpscr;
3495 '''
3496 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3497 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3498
3499 vqnegCode = '''
3500 FPSCR fpscr = (FPSCR) FpscrQc;
3501 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3502 fpscr.qc = 1;
3503 destElem = ~srcElem1;
3504 } else {
3505 destElem = -srcElem1;
3506 }
3507 FpscrQc = fpscr;
3508 '''
3509 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3510 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3511
3512 vabsCode = '''
3513 if (srcElem1 < 0) {
3514 destElem = -srcElem1;
3515 } else {
3516 destElem = srcElem1;
3517 }
3518 '''
3519
3520 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3521 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3522 vabsfpCode = '''
3523 union
3524 {
3525 uint32_t i;
3526 float f;
3527 } cStruct;
3528 cStruct.f = srcReg1;
3529 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3530 destReg = cStruct.f;
3531 '''
3532 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3533 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3534
3535 vnegCode = '''
3536 destElem = -srcElem1;
3537 '''
3538 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3539 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3540 vnegfpCode = '''
3541 destReg = -srcReg1;
3542 '''
3543 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3544 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3545
3546 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3547 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3548 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3549 vcgtfpCode = '''
3550 FPSCR fpscr = (FPSCR) FpscrExc;
3551 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3552 true, true, VfpRoundNearest);
3553 destReg = (res == 0) ? -1 : 0;
3554 if (res == 2.0)
3555 fpscr.ioc = 1;
3556 FpscrExc = fpscr;
3557 '''
3558 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3559 2, vcgtfpCode, toInt = True)
3560 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3561 4, vcgtfpCode, toInt = True)
3562
3563 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3564 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3565 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3566 vcgefpCode = '''
3567 FPSCR fpscr = (FPSCR) FpscrExc;
3568 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3569 true, true, VfpRoundNearest);
3570 destReg = (res == 0) ? -1 : 0;
3571 if (res == 2.0)
3572 fpscr.ioc = 1;
3573 FpscrExc = fpscr;
3574 '''
3575 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3576 2, vcgefpCode, toInt = True)
3577 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3578 4, vcgefpCode, toInt = True)
3579
3580 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3581 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3582 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3583 vceqfpCode = '''
3584 FPSCR fpscr = (FPSCR) FpscrExc;
3585 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3586 true, true, VfpRoundNearest);
3587 destReg = (res == 0) ? -1 : 0;
3588 if (res == 2.0)
3589 fpscr.ioc = 1;
3590 FpscrExc = fpscr;
3591 '''
3592 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3593 2, vceqfpCode, toInt = True)
3594 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3595 4, vceqfpCode, toInt = True)
3596
3597 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3598 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3599 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3600 vclefpCode = '''
3601 FPSCR fpscr = (FPSCR) FpscrExc;
3602 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3603 true, true, VfpRoundNearest);
3604 destReg = (res == 0) ? -1 : 0;
3605 if (res == 2.0)
3606 fpscr.ioc = 1;
3607 FpscrExc = fpscr;
3608 '''
3609 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3610 2, vclefpCode, toInt = True)
3611 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3612 4, vclefpCode, toInt = True)
3613
3614 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3615 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3616 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3617 vcltfpCode = '''
3618 FPSCR fpscr = (FPSCR) FpscrExc;
3619 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3620 true, true, VfpRoundNearest);
3621 destReg = (res == 0) ? -1 : 0;
3622 if (res == 2.0)
3623 fpscr.ioc = 1;
3624 FpscrExc = fpscr;
3625 '''
3626 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3627 2, vcltfpCode, toInt = True)
3628 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3629 4, vcltfpCode, toInt = True)
3630
3631 vswpCode = '''
3632 FloatRegBits mid;
3633 for (unsigned r = 0; r < rCount; r++) {
3634 mid = srcReg1.regs[r];
3635 srcReg1.regs[r] = destReg.regs[r];
3636 destReg.regs[r] = mid;
3637 }
3638 '''
3639 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3640 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3641
3642 vtrnCode = '''
3643 Element mid;
3644 for (unsigned i = 0; i < eCount; i += 2) {
3645 mid = srcReg1.elements[i];
3646 srcReg1.elements[i] = destReg.elements[i + 1];
3647 destReg.elements[i + 1] = mid;
3648 }
3649 '''
3650 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3651 smallUnsignedTypes, 2, vtrnCode)
3652 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3653 smallUnsignedTypes, 4, vtrnCode)
3654
3655 vuzpCode = '''
3656 Element mid[eCount];
3657 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3658 for (unsigned i = 0; i < eCount / 2; i++) {
3659 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3660 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3661 destReg.elements[i] = destReg.elements[2 * i];
3662 }
3663 for (unsigned i = 0; i < eCount / 2; i++) {
3664 destReg.elements[eCount / 2 + i] = mid[2 * i];
3665 }
3666 '''
3667 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3668 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3669
3670 vzipCode = '''
3671 Element mid[eCount];
3672 memcpy(&mid, &destReg, sizeof(destReg));
3673 for (unsigned i = 0; i < eCount / 2; i++) {
3674 destReg.elements[2 * i] = mid[i];
3675 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3676 }
3677 for (int i = 0; i < eCount / 2; i++) {
3678 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3679 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3680 }
3681 '''
3682 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3683 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3684
3685 vmovnCode = 'destElem = srcElem1;'
3686 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3687
3688 vdupCode = 'destElem = srcElem1;'
3689 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3690 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3691
3692 def vdupGprInst(name, Name, opClass, types, rCount):
3693 global header_output, exec_output
3694 eWalkCode = '''
3695 RegVect destReg;
3696 for (unsigned i = 0; i < eCount; i++) {
3697 destReg.elements[i] = htog((Element)Op1);
3698 }
3699 '''
3700 for reg in range(rCount):
3701 eWalkCode += '''
3702 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3703 ''' % { "reg" : reg }
3704 iop = InstObjParams(name, Name,
3705 "RegRegOp",
3706 { "code": eWalkCode,
3707 "r_count": rCount,
3708 "predicate_test": predicateTest,
3709 "op_class": opClass }, [])
3710 header_output += NeonRegRegOpDeclare.subst(iop)
3711 exec_output += NeonEqualRegExecute.subst(iop)
3712 for type in types:
3713 substDict = { "targs" : type,
3714 "class_name" : Name }
3715 exec_output += NeonExecDeclare.subst(substDict)
3716 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3717 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3718
3719 vmovCode = 'destElem = imm;'
3720 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3721 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3722
3723 vorrCode = 'destElem |= imm;'
3724 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3725 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3726
3727 vmvnCode = 'destElem = ~imm;'
3728 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3729 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3730
3731 vbicCode = 'destElem &= ~imm;'
3732 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3733 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3734
3735 vqmovnCode = '''
3736 FPSCR fpscr = (FPSCR) FpscrQc;
3737 destElem = srcElem1;
3738 if ((BigElement)destElem != srcElem1) {
3739 fpscr.qc = 1;
3740 destElem = mask(sizeof(Element) * 8 - 1);
3741 if (srcElem1 < 0)
3742 destElem = ~destElem;
3743 }
3744 FpscrQc = fpscr;
3745 '''
3746 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3747
3748 vqmovunCode = '''
3749 FPSCR fpscr = (FPSCR) FpscrQc;
3750 destElem = srcElem1;
3751 if ((BigElement)destElem != srcElem1) {
3752 fpscr.qc = 1;
3753 destElem = mask(sizeof(Element) * 8);
3754 }
3755 FpscrQc = fpscr;
3756 '''
3757 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3758 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3759
3760 vqmovunsCode = '''
3761 FPSCR fpscr = (FPSCR) FpscrQc;
3762 destElem = srcElem1;
3763 if (srcElem1 < 0 ||
3764 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3765 fpscr.qc = 1;
3766 destElem = mask(sizeof(Element) * 8);
3767 if (srcElem1 < 0)
3768 destElem = ~destElem;
3769 }
3770 FpscrQc = fpscr;
3771 '''
3772 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3773 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3774
3775 def buildVext(name, Name, opClass, types, rCount, op):
3776 global header_output, exec_output
3777 eWalkCode = '''
3778 RegVect srcReg1, srcReg2, destReg;
3779 '''
3780 for reg in range(rCount):
3781 eWalkCode += simdEnabledCheckCode + '''
3782 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3783 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3784 ''' % { "reg" : reg }
3785 eWalkCode += op
3786 for reg in range(rCount):
3787 eWalkCode += '''
3788 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3789 ''' % { "reg" : reg }
3790 iop = InstObjParams(name, Name,
3791 "RegRegRegImmOp",
3792 { "code": eWalkCode,
3793 "r_count": rCount,
3794 "predicate_test": predicateTest,
3795 "op_class": opClass }, [])
3796 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3797 exec_output += NeonEqualRegExecute.subst(iop)
3798 for type in types:
3799 substDict = { "targs" : type,
3800 "class_name" : Name }
3801 exec_output += NeonExecDeclare.subst(substDict)
3802
3803 vextCode = '''
3804 for (unsigned i = 0; i < eCount; i++) {
3805 unsigned index = i + imm;
3806 if (index < eCount) {
3807 destReg.elements[i] = srcReg1.elements[index];
3808 } else {
3809 index -= eCount;
3810 if (index >= eCount) {
3811 fault = new UndefinedInstruction(machInst, false, mnemonic);
3812 } else {
3813 destReg.elements[i] = srcReg2.elements[index];
3814 }
3815 }
3816 }
3817 '''
3818 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3819 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3820
3821 def buildVtbxl(name, Name, opClass, length, isVtbl):
3822 global header_output, decoder_output, exec_output
3823 code = '''
3824 union
3825 {
3826 uint8_t bytes[32];
3827 FloatRegBits regs[8];
3828 } table;
3829
3830 union
3831 {
3832 uint8_t bytes[8];
3833 FloatRegBits regs[2];
3834 } destReg, srcReg2;
3835
3836 const unsigned length = %(length)d;
3837 const bool isVtbl = %(isVtbl)s;
3838
3839 srcReg2.regs[0] = htog(FpOp2P0_uw);
3840 srcReg2.regs[1] = htog(FpOp2P1_uw);
3841
3842 destReg.regs[0] = htog(FpDestP0_uw);
3843 destReg.regs[1] = htog(FpDestP1_uw);
3844 ''' % { "length" : length, "isVtbl" : isVtbl }
3845 for reg in range(8):
3846 if reg < length * 2:
3847 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3848 { "reg" : reg }
3849 else:
3850 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3851 code += '''
3852 for (unsigned i = 0; i < sizeof(destReg); i++) {
3853 uint8_t index = srcReg2.bytes[i];
3854 if (index < 8 * length) {
3855 destReg.bytes[i] = table.bytes[index];
3856 } else {
3857 if (isVtbl)
3858 destReg.bytes[i] = 0;
3859 // else destReg.bytes[i] unchanged
3860 }
3861 }
3862
3863 FpDestP0_uw = gtoh(destReg.regs[0]);
3864 FpDestP1_uw = gtoh(destReg.regs[1]);
3865 '''
3866 iop = InstObjParams(name, Name,
3867 "RegRegRegOp",
3868 { "code": code,
3869 "predicate_test": predicateTest,
3870 "op_class": opClass }, [])
3871 header_output += RegRegRegOpDeclare.subst(iop)
3872 decoder_output += RegRegRegOpConstructor.subst(iop)
3873 exec_output += PredOpExecute.subst(iop)
3874
3875 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3876 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3877 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3878 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3879
3880 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3881 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3882 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3883 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3884}};