neon.isa (10829:1e38e545823b) neon.isa (11443:df24b9af42c7)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133 IntRegIndex dest, IntRegIndex op1,
134 IntRegIndex op2)
135 {
136 switch (size) {
137 case 1:
138 return new Base<int16_t>(machInst, dest, op1, op2);
139 case 2:
140 return new Base<int32_t>(machInst, dest, op1, op2);
141 default:
142 return new Unknown(machInst);
143 }
144 }
145
146 template <template <typename T> class Base>
147 StaticInstPtr
148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2, uint64_t imm)
151 {
152 switch (size) {
153 case 1:
154 return new Base<int16_t>(machInst, dest, op1, op2, imm);
155 case 2:
156 return new Base<int32_t>(machInst, dest, op1, op2, imm);
157 default:
158 return new Unknown(machInst);
159 }
160 }
161
162 template <template <typename T> class Base>
163 StaticInstPtr
164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (notSigned) {
169 return decodeNeonUThreeUSReg<Base>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<Base>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUThreeSReg(bool q, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (q) {
185 return decodeNeonUThreeUSReg<BaseQ>(
186 size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonUThreeUSReg<BaseD>(
189 size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonSThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonSThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonSThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeXReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (q) {
233 return decodeNeonUThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonUThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
247 {
248 if (notSigned) {
249 return decodeNeonUThreeSReg<BaseD, BaseQ>(
250 q, size, machInst, dest, op1, op2);
251 } else {
252 return decodeNeonSThreeSReg<BaseD, BaseQ>(
253 q, size, machInst, dest, op1, op2);
254 }
255 }
256
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
259 StaticInstPtr
260 decodeNeonUThreeReg(bool q, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
263 {
264 if (q) {
265 return decodeNeonUThreeUReg<BaseQ>(
266 size, machInst, dest, op1, op2);
267 } else {
268 return decodeNeonUThreeUReg<BaseD>(
269 size, machInst, dest, op1, op2);
270 }
271 }
272
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
275 StaticInstPtr
276 decodeNeonSThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
279 {
280 if (q) {
281 return decodeNeonSThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
283 } else {
284 return decodeNeonSThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
286 }
287 }
288
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
291 StaticInstPtr
292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
295 {
296 if (notSigned) {
297 return decodeNeonUThreeReg<BaseD, BaseQ>(
298 q, size, machInst, dest, op1, op2);
299 } else {
300 return decodeNeonSThreeReg<BaseD, BaseQ>(
301 q, size, machInst, dest, op1, op2);
302 }
303 }
304
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
307 StaticInstPtr
308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310 {
311 if (q) {
312 if (size)
313 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314 else
315 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316 } else {
317 if (size)
318 return new Unknown(machInst);
319 else
320 return new BaseD<uint32_t>(machInst, dest, op1, op2);
321 }
322 }
323
324 template <template <typename T> class Base>
325 StaticInstPtr
326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328 {
329 if (size)
330 return new Base<uint64_t>(machInst, dest, op1, op2);
331 else
332 return new Base<uint32_t>(machInst, dest, op1, op2);
333 }
334
335 template <template <typename T> class Base>
336 StaticInstPtr
337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338 IntRegIndex dest, IntRegIndex op1,
339 IntRegIndex op2, uint64_t imm)
340 {
341 if (size)
342 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343 else
344 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345 }
346
347 template <template <typename T> class BaseD,
348 template <typename T> class BaseQ>
349 StaticInstPtr
350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351 IntRegIndex dest, IntRegIndex op1,
352 IntRegIndex op2, uint64_t imm)
353 {
354 if (q) {
355 switch (size) {
356 case 1:
357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358 case 2:
359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360 default:
361 return new Unknown(machInst);
362 }
363 } else {
364 switch (size) {
365 case 1:
366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367 case 2:
368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369 default:
370 return new Unknown(machInst);
371 }
372 }
373 }
374
375 template <template <typename T> class BaseD,
376 template <typename T> class BaseQ>
377 StaticInstPtr
378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379 IntRegIndex dest, IntRegIndex op1,
380 IntRegIndex op2, uint64_t imm)
381 {
382 if (q) {
383 switch (size) {
384 case 1:
385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386 case 2:
387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388 default:
389 return new Unknown(machInst);
390 }
391 } else {
392 switch (size) {
393 case 1:
394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395 case 2:
396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397 default:
398 return new Unknown(machInst);
399 }
400 }
401 }
402
403 template <template <typename T> class BaseD,
404 template <typename T> class BaseQ>
405 StaticInstPtr
406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407 IntRegIndex dest, IntRegIndex op1,
408 IntRegIndex op2, uint64_t imm)
409 {
410 if (q) {
411 if (size)
412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413 else
414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415 } else {
416 if (size)
417 return new Unknown(machInst);
418 else
419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420 }
421 }
422
423 template <template <typename T> class BaseD,
424 template <typename T> class BaseQ>
425 StaticInstPtr
426 decodeNeonUTwoShiftReg(bool q, unsigned size,
427 ExtMachInst machInst, IntRegIndex dest,
428 IntRegIndex op1, uint64_t imm)
429 {
430 if (q) {
431 switch (size) {
432 case 0:
433 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434 case 1:
435 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436 case 2:
437 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438 case 3:
439 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440 default:
441 return new Unknown(machInst);
442 }
443 } else {
444 switch (size) {
445 case 0:
446 return new BaseD<uint8_t>(machInst, dest, op1, imm);
447 case 1:
448 return new BaseD<uint16_t>(machInst, dest, op1, imm);
449 case 2:
450 return new BaseD<uint32_t>(machInst, dest, op1, imm);
451 case 3:
452 return new BaseD<uint64_t>(machInst, dest, op1, imm);
453 default:
454 return new Unknown(machInst);
455 }
456 }
457 }
458
459 template <template <typename T> class BaseD,
460 template <typename T> class BaseQ>
461 StaticInstPtr
462 decodeNeonSTwoShiftReg(bool q, unsigned size,
463 ExtMachInst machInst, IntRegIndex dest,
464 IntRegIndex op1, uint64_t imm)
465 {
466 if (q) {
467 switch (size) {
468 case 0:
469 return new BaseQ<int8_t>(machInst, dest, op1, imm);
470 case 1:
471 return new BaseQ<int16_t>(machInst, dest, op1, imm);
472 case 2:
473 return new BaseQ<int32_t>(machInst, dest, op1, imm);
474 case 3:
475 return new BaseQ<int64_t>(machInst, dest, op1, imm);
476 default:
477 return new Unknown(machInst);
478 }
479 } else {
480 switch (size) {
481 case 0:
482 return new BaseD<int8_t>(machInst, dest, op1, imm);
483 case 1:
484 return new BaseD<int16_t>(machInst, dest, op1, imm);
485 case 2:
486 return new BaseD<int32_t>(machInst, dest, op1, imm);
487 case 3:
488 return new BaseD<int64_t>(machInst, dest, op1, imm);
489 default:
490 return new Unknown(machInst);
491 }
492 }
493 }
494
495
496 template <template <typename T> class BaseD,
497 template <typename T> class BaseQ>
498 StaticInstPtr
499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500 ExtMachInst machInst, IntRegIndex dest,
501 IntRegIndex op1, uint64_t imm)
502 {
503 if (notSigned) {
504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505 q, size, machInst, dest, op1, imm);
506 } else {
507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508 q, size, machInst, dest, op1, imm);
509 }
510 }
511
512 template <template <typename T> class Base>
513 StaticInstPtr
514 decodeNeonUTwoShiftUSReg(unsigned size,
515 ExtMachInst machInst, IntRegIndex dest,
516 IntRegIndex op1, uint64_t imm)
517 {
518 switch (size) {
519 case 0:
520 return new Base<uint8_t>(machInst, dest, op1, imm);
521 case 1:
522 return new Base<uint16_t>(machInst, dest, op1, imm);
523 case 2:
524 return new Base<uint32_t>(machInst, dest, op1, imm);
525 default:
526 return new Unknown(machInst);
527 }
528 }
529
530 template <template <typename T> class Base>
531 StaticInstPtr
532 decodeNeonUTwoShiftUReg(unsigned size,
533 ExtMachInst machInst, IntRegIndex dest,
534 IntRegIndex op1, uint64_t imm)
535 {
536 switch (size) {
537 case 0:
538 return new Base<uint8_t>(machInst, dest, op1, imm);
539 case 1:
540 return new Base<uint16_t>(machInst, dest, op1, imm);
541 case 2:
542 return new Base<uint32_t>(machInst, dest, op1, imm);
543 case 3:
544 return new Base<uint64_t>(machInst, dest, op1, imm);
545 default:
546 return new Unknown(machInst);
547 }
548 }
549
550 template <template <typename T> class Base>
551 StaticInstPtr
552 decodeNeonSTwoShiftUReg(unsigned size,
553 ExtMachInst machInst, IntRegIndex dest,
554 IntRegIndex op1, uint64_t imm)
555 {
556 switch (size) {
557 case 0:
558 return new Base<int8_t>(machInst, dest, op1, imm);
559 case 1:
560 return new Base<int16_t>(machInst, dest, op1, imm);
561 case 2:
562 return new Base<int32_t>(machInst, dest, op1, imm);
563 case 3:
564 return new Base<int64_t>(machInst, dest, op1, imm);
565 default:
566 return new Unknown(machInst);
567 }
568 }
569
570 template <template <typename T> class BaseD,
571 template <typename T> class BaseQ>
572 StaticInstPtr
573 decodeNeonUTwoShiftSReg(bool q, unsigned size,
574 ExtMachInst machInst, IntRegIndex dest,
575 IntRegIndex op1, uint64_t imm)
576 {
577 if (q) {
578 return decodeNeonUTwoShiftUSReg<BaseQ>(
579 size, machInst, dest, op1, imm);
580 } else {
581 return decodeNeonUTwoShiftUSReg<BaseD>(
582 size, machInst, dest, op1, imm);
583 }
584 }
585
586 template <template <typename T> class Base>
587 StaticInstPtr
588 decodeNeonSTwoShiftUSReg(unsigned size,
589 ExtMachInst machInst, IntRegIndex dest,
590 IntRegIndex op1, uint64_t imm)
591 {
592 switch (size) {
593 case 0:
594 return new Base<int8_t>(machInst, dest, op1, imm);
595 case 1:
596 return new Base<int16_t>(machInst, dest, op1, imm);
597 case 2:
598 return new Base<int32_t>(machInst, dest, op1, imm);
599 default:
600 return new Unknown(machInst);
601 }
602 }
603
604 template <template <typename T> class BaseD,
605 template <typename T> class BaseQ>
606 StaticInstPtr
607 decodeNeonSTwoShiftSReg(bool q, unsigned size,
608 ExtMachInst machInst, IntRegIndex dest,
609 IntRegIndex op1, uint64_t imm)
610 {
611 if (q) {
612 return decodeNeonSTwoShiftUSReg<BaseQ>(
613 size, machInst, dest, op1, imm);
614 } else {
615 return decodeNeonSTwoShiftUSReg<BaseD>(
616 size, machInst, dest, op1, imm);
617 }
618 }
619
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
622 StaticInstPtr
623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
626 {
627 if (notSigned) {
628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629 q, size, machInst, dest, op1, imm);
630 } else {
631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632 q, size, machInst, dest, op1, imm);
633 }
634 }
635
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
638 StaticInstPtr
639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641 {
642 if (q) {
643 return decodeNeonUTwoShiftUReg<BaseQ>(
644 size, machInst, dest, op1, imm);
645 } else {
646 return decodeNeonUTwoShiftUSReg<BaseD>(
647 size, machInst, dest, op1, imm);
648 }
649 }
650
651 template <template <typename T> class BaseD,
652 template <typename T> class BaseQ>
653 StaticInstPtr
654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656 {
657 if (q) {
658 return decodeNeonSTwoShiftUReg<BaseQ>(
659 size, machInst, dest, op1, imm);
660 } else {
661 return decodeNeonSTwoShiftUSReg<BaseD>(
662 size, machInst, dest, op1, imm);
663 }
664 }
665
666 template <template <typename T> class Base>
667 StaticInstPtr
668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670 {
671 if (size)
672 return new Base<uint64_t>(machInst, dest, op1, imm);
673 else
674 return new Base<uint32_t>(machInst, dest, op1, imm);
675 }
676
677 template <template <typename T> class BaseD,
678 template <typename T> class BaseQ>
679 StaticInstPtr
680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682 {
683 if (q) {
684 if (size)
685 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686 else
687 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688 } else {
689 if (size)
690 return new Unknown(machInst);
691 else
692 return new BaseD<uint32_t>(machInst, dest, op1, imm);
693 }
694 }
695
696 template <template <typename T> class Base>
697 StaticInstPtr
698 decodeNeonUTwoMiscUSReg(unsigned size,
699 ExtMachInst machInst, IntRegIndex dest,
700 IntRegIndex op1)
701 {
702 switch (size) {
703 case 0:
704 return new Base<uint8_t>(machInst, dest, op1);
705 case 1:
706 return new Base<uint16_t>(machInst, dest, op1);
707 case 2:
708 return new Base<uint32_t>(machInst, dest, op1);
709 default:
710 return new Unknown(machInst);
711 }
712 }
713
714 template <template <typename T> class Base>
715 StaticInstPtr
716 decodeNeonSTwoMiscUSReg(unsigned size,
717 ExtMachInst machInst, IntRegIndex dest,
718 IntRegIndex op1)
719 {
720 switch (size) {
721 case 0:
722 return new Base<int8_t>(machInst, dest, op1);
723 case 1:
724 return new Base<int16_t>(machInst, dest, op1);
725 case 2:
726 return new Base<int32_t>(machInst, dest, op1);
727 default:
728 return new Unknown(machInst);
729 }
730 }
731
732 template <template <typename T> class BaseD,
733 template <typename T> class BaseQ>
734 StaticInstPtr
735 decodeNeonUTwoMiscSReg(bool q, unsigned size,
736 ExtMachInst machInst, IntRegIndex dest,
737 IntRegIndex op1)
738 {
739 if (q) {
740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741 } else {
742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743 }
744 }
745
746 template <template <typename T> class BaseD,
747 template <typename T> class BaseQ>
748 StaticInstPtr
749 decodeNeonSTwoMiscSReg(bool q, unsigned size,
750 ExtMachInst machInst, IntRegIndex dest,
751 IntRegIndex op1)
752 {
753 if (q) {
754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755 } else {
756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757 }
758 }
759
760 template <template <typename T> class Base>
761 StaticInstPtr
762 decodeNeonUTwoMiscUReg(unsigned size,
763 ExtMachInst machInst, IntRegIndex dest,
764 IntRegIndex op1)
765 {
766 switch (size) {
767 case 0:
768 return new Base<uint8_t>(machInst, dest, op1);
769 case 1:
770 return new Base<uint16_t>(machInst, dest, op1);
771 case 2:
772 return new Base<uint32_t>(machInst, dest, op1);
773 case 3:
774 return new Base<uint64_t>(machInst, dest, op1);
775 default:
776 return new Unknown(machInst);
777 }
778 }
779
780 template <template <typename T> class Base>
781 StaticInstPtr
782 decodeNeonSTwoMiscUReg(unsigned size,
783 ExtMachInst machInst, IntRegIndex dest,
784 IntRegIndex op1)
785 {
786 switch (size) {
787 case 0:
788 return new Base<int8_t>(machInst, dest, op1);
789 case 1:
790 return new Base<int16_t>(machInst, dest, op1);
791 case 2:
792 return new Base<int32_t>(machInst, dest, op1);
793 case 3:
794 return new Base<int64_t>(machInst, dest, op1);
795 default:
796 return new Unknown(machInst);
797 }
798 }
799
800 template <template <typename T> class BaseD,
801 template <typename T> class BaseQ>
802 StaticInstPtr
803 decodeNeonSTwoMiscReg(bool q, unsigned size,
804 ExtMachInst machInst, IntRegIndex dest,
805 IntRegIndex op1)
806 {
807 if (q) {
808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809 } else {
810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811 }
812 }
813
814 template <template <typename T> class BaseD,
815 template <typename T> class BaseQ>
816 StaticInstPtr
817 decodeNeonUTwoMiscReg(bool q, unsigned size,
818 ExtMachInst machInst, IntRegIndex dest,
819 IntRegIndex op1)
820 {
821 if (q) {
822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823 } else {
824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825 }
826 }
827
828 template <template <typename T> class BaseD,
829 template <typename T> class BaseQ>
830 StaticInstPtr
831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832 ExtMachInst machInst, IntRegIndex dest,
833 IntRegIndex op1)
834 {
835 if (notSigned) {
836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837 q, size, machInst, dest, op1);
838 } else {
839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840 q, size, machInst, dest, op1);
841 }
842 }
843
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
846 StaticInstPtr
847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848 IntRegIndex dest, IntRegIndex op1)
849 {
850 if (q) {
851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852 } else {
853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854 }
855 }
856
857 template <template <typename T> class BaseD,
858 template <typename T> class BaseQ>
859 StaticInstPtr
860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861 IntRegIndex dest, IntRegIndex op1)
862 {
863 if (q) {
864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865 } else {
866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867 }
868 }
869
870 template <template <typename T> class BaseD,
871 template <typename T> class BaseQ>
872 StaticInstPtr
873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874 IntRegIndex dest, IntRegIndex op1)
875 {
876 if (q) {
877 if (size)
878 return new BaseQ<uint64_t>(machInst, dest, op1);
879 else
880 return new BaseQ<uint32_t>(machInst, dest, op1);
881 } else {
882 if (size)
883 return new Unknown(machInst);
884 else
885 return new BaseD<uint32_t>(machInst, dest, op1);
886 }
887 }
888
889 template <template <typename T> class BaseD,
890 template <typename T> class BaseQ>
891 StaticInstPtr
892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893 IntRegIndex dest, IntRegIndex op1)
894 {
895 if (size)
896 return new BaseQ<uint64_t>(machInst, dest, op1);
897 else
898 return new BaseD<uint32_t>(machInst, dest, op1);
899 }
900
901 template <template <typename T> class Base>
902 StaticInstPtr
903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904 IntRegIndex dest, IntRegIndex op1)
905 {
906 if (size)
907 return new Base<uint64_t>(machInst, dest, op1);
908 else
909 return new Base<uint32_t>(machInst, dest, op1);
910 }
911
912 template <template <typename T> class BaseD,
913 template <typename T> class BaseQ>
914 StaticInstPtr
915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1)
917 {
918 if (q) {
919 switch (size) {
920 case 0x0:
921 return new BaseQ<uint8_t>(machInst, dest, op1);
922 case 0x1:
923 return new BaseQ<uint16_t>(machInst, dest, op1);
924 case 0x2:
925 return new BaseQ<uint32_t>(machInst, dest, op1);
926 default:
927 return new Unknown(machInst);
928 }
929 } else {
930 switch (size) {
931 case 0x0:
932 return new BaseD<uint8_t>(machInst, dest, op1);
933 case 0x1:
934 return new BaseD<uint16_t>(machInst, dest, op1);
935 default:
936 return new Unknown(machInst);
937 }
938 }
939 }
940
941 template <template <typename T> class BaseD,
942 template <typename T> class BaseQ,
943 template <typename T> class BaseBQ>
944 StaticInstPtr
945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946 IntRegIndex dest, IntRegIndex op1)
947 {
948 if (q) {
949 switch (size) {
950 case 0x0:
951 return new BaseQ<uint8_t>(machInst, dest, op1);
952 case 0x1:
953 return new BaseQ<uint16_t>(machInst, dest, op1);
954 case 0x2:
955 return new BaseBQ<uint32_t>(machInst, dest, op1);
956 default:
957 return new Unknown(machInst);
958 }
959 } else {
960 switch (size) {
961 case 0x0:
962 return new BaseD<uint8_t>(machInst, dest, op1);
963 case 0x1:
964 return new BaseD<uint16_t>(machInst, dest, op1);
965 default:
966 return new Unknown(machInst);
967 }
968 }
969 }
970
971 template <template <typename T> class BaseD,
972 template <typename T> class BaseQ>
973 StaticInstPtr
974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975 IntRegIndex dest, IntRegIndex op1)
976 {
977 if (q) {
978 switch (size) {
979 case 0x0:
980 return new BaseQ<int8_t>(machInst, dest, op1);
981 case 0x1:
982 return new BaseQ<int16_t>(machInst, dest, op1);
983 case 0x2:
984 return new BaseQ<int32_t>(machInst, dest, op1);
985 default:
986 return new Unknown(machInst);
987 }
988 } else {
989 switch (size) {
990 case 0x0:
991 return new BaseD<int8_t>(machInst, dest, op1);
992 case 0x1:
993 return new BaseD<int16_t>(machInst, dest, op1);
994 default:
995 return new Unknown(machInst);
996 }
997 }
998 }
999
1000 template <template <typename T> class BaseD,
1001 template <typename T> class BaseQ,
1002 template <typename T> class BaseBQ>
1003 StaticInstPtr
1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005 IntRegIndex dest, IntRegIndex op1)
1006 {
1007 if (q) {
1008 switch (size) {
1009 case 0x0:
1010 return new BaseQ<uint8_t>(machInst, dest, op1);
1011 case 0x1:
1012 return new BaseQ<uint16_t>(machInst, dest, op1);
1013 case 0x2:
1014 return new BaseBQ<uint32_t>(machInst, dest, op1);
1015 default:
1016 return new Unknown(machInst);
1017 }
1018 } else {
1019 switch (size) {
1020 case 0x0:
1021 return new BaseD<uint8_t>(machInst, dest, op1);
1022 case 0x1:
1023 return new BaseD<uint16_t>(machInst, dest, op1);
1024 default:
1025 return new Unknown(machInst);
1026 }
1027 }
1028 }
1029
1030 template <template <typename T> class BaseD,
1031 template <typename T> class BaseQ,
1032 template <typename T> class BaseBQ>
1033 StaticInstPtr
1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035 IntRegIndex dest, IntRegIndex op1)
1036 {
1037 if (q) {
1038 switch (size) {
1039 case 0x0:
1040 return new BaseQ<int8_t>(machInst, dest, op1);
1041 case 0x1:
1042 return new BaseQ<int16_t>(machInst, dest, op1);
1043 case 0x2:
1044 return new BaseBQ<int32_t>(machInst, dest, op1);
1045 default:
1046 return new Unknown(machInst);
1047 }
1048 } else {
1049 switch (size) {
1050 case 0x0:
1051 return new BaseD<int8_t>(machInst, dest, op1);
1052 case 0x1:
1053 return new BaseD<int16_t>(machInst, dest, op1);
1054 default:
1055 return new Unknown(machInst);
1056 }
1057 }
1058 }
1059}};
1060
1061let {{
1062 header_output = ""
1063 exec_output = ""
1064
1065 vcompares = '''
1066 static float
1067 vcgtFunc(float op1, float op2)
1068 {
1069 if (std::isnan(op1) || std::isnan(op2))
1070 return 2.0;
1071 return (op1 > op2) ? 0.0 : 1.0;
1072 }
1073
1074 static float
1075 vcgeFunc(float op1, float op2)
1076 {
1077 if (std::isnan(op1) || std::isnan(op2))
1078 return 2.0;
1079 return (op1 >= op2) ? 0.0 : 1.0;
1080 }
1081
1082 static float
1083 vceqFunc(float op1, float op2)
1084 {
1085 if (isSnan(op1) || isSnan(op2))
1086 return 2.0;
1087 return (op1 == op2) ? 0.0 : 1.0;
1088 }
1089'''
1090 vcomparesL = '''
1091 static float
1092 vcleFunc(float op1, float op2)
1093 {
1094 if (std::isnan(op1) || std::isnan(op2))
1095 return 2.0;
1096 return (op1 <= op2) ? 0.0 : 1.0;
1097 }
1098
1099 static float
1100 vcltFunc(float op1, float op2)
1101 {
1102 if (std::isnan(op1) || std::isnan(op2))
1103 return 2.0;
1104 return (op1 < op2) ? 0.0 : 1.0;
1105 }
1106'''
1107 vacomparesG = '''
1108 static float
1109 vacgtFunc(float op1, float op2)
1110 {
1111 if (std::isnan(op1) || std::isnan(op2))
1112 return 2.0;
1113 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114 }
1115
1116 static float
1117 vacgeFunc(float op1, float op2)
1118 {
1119 if (std::isnan(op1) || std::isnan(op2))
1120 return 2.0;
1121 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122 }
1123'''
1124
1125 exec_output += vcompares + vacomparesG
1126
1127 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130 signedTypes = smallSignedTypes + ("int64_t",)
1131 smallTypes = smallUnsignedTypes + smallSignedTypes
1132 allTypes = unsignedTypes + signedTypes
1133
1134 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135 readDest=False, pairwise=False):
1136 global header_output, exec_output
1137 eWalkCode = simdEnabledCheckCode + '''
1138 RegVect srcReg1, srcReg2, destReg;
1139 '''
1140 for reg in range(rCount):
1141 eWalkCode += '''
1142 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144 ''' % { "reg" : reg }
1145 if readDest:
1146 eWalkCode += '''
1147 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148 ''' % { "reg" : reg }
1149 readDestCode = ''
1150 if readDest:
1151 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152 if pairwise:
1153 eWalkCode += '''
1154 for (unsigned i = 0; i < eCount; i++) {
1155 Element srcElem1 = gtoh(2 * i < eCount ?
1156 srcReg1.elements[2 * i] :
1157 srcReg2.elements[2 * i - eCount]);
1158 Element srcElem2 = gtoh(2 * i < eCount ?
1159 srcReg1.elements[2 * i + 1] :
1160 srcReg2.elements[2 * i + 1 - eCount]);
1161 Element destElem;
1162 %(readDest)s
1163 %(op)s
1164 destReg.elements[i] = htog(destElem);
1165 }
1166 ''' % { "op" : op, "readDest" : readDestCode }
1167 else:
1168 eWalkCode += '''
1169 for (unsigned i = 0; i < eCount; i++) {
1170 Element srcElem1 = gtoh(srcReg1.elements[i]);
1171 Element srcElem2 = gtoh(srcReg2.elements[i]);
1172 Element destElem;
1173 %(readDest)s
1174 %(op)s
1175 destReg.elements[i] = htog(destElem);
1176 }
1177 ''' % { "op" : op, "readDest" : readDestCode }
1178 for reg in range(rCount):
1179 eWalkCode += '''
1180 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181 ''' % { "reg" : reg }
1182 iop = InstObjParams(name, Name,
1183 "RegRegRegOp",
1184 { "code": eWalkCode,
1185 "r_count": rCount,
1186 "predicate_test": predicateTest,
1187 "op_class": opClass }, [])
1188 header_output += NeonRegRegRegOpDeclare.subst(iop)
1189 exec_output += NeonEqualRegExecute.subst(iop)
1190 for type in types:
1191 substDict = { "targs" : type,
1192 "class_name" : Name }
1193 exec_output += NeonExecDeclare.subst(substDict)
1194
1195 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196 readDest=False, pairwise=False, toInt=False):
1197 global header_output, exec_output
1198 eWalkCode = simdEnabledCheckCode + '''
1199 typedef FloatReg FloatVect[rCount];
1200 FloatVect srcRegs1, srcRegs2;
1201 '''
1202 if toInt:
1203 eWalkCode += 'RegVect destRegs;\n'
1204 else:
1205 eWalkCode += 'FloatVect destRegs;\n'
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210 ''' % { "reg" : reg }
1211 if readDest:
1212 if toInt:
1213 eWalkCode += '''
1214 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215 ''' % { "reg" : reg }
1216 else:
1217 eWalkCode += '''
1218 destRegs[%(reg)d] = FpDestP%(reg)d;
1219 ''' % { "reg" : reg }
1220 readDestCode = ''
1221 if readDest:
1222 readDestCode = 'destReg = destRegs[r];'
1223 destType = 'FloatReg'
1224 writeDest = 'destRegs[r] = destReg;'
1225 if toInt:
1226 destType = 'FloatRegBits'
1227 writeDest = 'destRegs.regs[r] = destReg;'
1228 if pairwise:
1229 eWalkCode += '''
1230 for (unsigned r = 0; r < rCount; r++) {
1231 FloatReg srcReg1 = (2 * r < rCount) ?
1232 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1233 FloatReg srcReg2 = (2 * r < rCount) ?
1234 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235 %(destType)s destReg;
1236 %(readDest)s
1237 %(op)s
1238 %(writeDest)s
1239 }
1240 ''' % { "op" : op,
1241 "readDest" : readDestCode,
1242 "destType" : destType,
1243 "writeDest" : writeDest }
1244 else:
1245 eWalkCode += '''
1246 for (unsigned r = 0; r < rCount; r++) {
1247 FloatReg srcReg1 = srcRegs1[r];
1248 FloatReg srcReg2 = srcRegs2[r];
1249 %(destType)s destReg;
1250 %(readDest)s
1251 %(op)s
1252 %(writeDest)s
1253 }
1254 ''' % { "op" : op,
1255 "readDest" : readDestCode,
1256 "destType" : destType,
1257 "writeDest" : writeDest }
1258 for reg in range(rCount):
1259 if toInt:
1260 eWalkCode += '''
1261 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262 ''' % { "reg" : reg }
1263 else:
1264 eWalkCode += '''
1265 FpDestP%(reg)d = destRegs[%(reg)d];
1266 ''' % { "reg" : reg }
1267 iop = InstObjParams(name, Name,
1268 "FpRegRegRegOp",
1269 { "code": eWalkCode,
1270 "r_count": rCount,
1271 "predicate_test": predicateTest,
1272 "op_class": opClass }, [])
1273 header_output += NeonRegRegRegOpDeclare.subst(iop)
1274 exec_output += NeonEqualRegExecute.subst(iop)
1275 for type in types:
1276 substDict = { "targs" : type,
1277 "class_name" : Name }
1278 exec_output += NeonExecDeclare.subst(substDict)
1279
1280 def threeUnequalRegInst(name, Name, opClass, types, op,
1281 bigSrc1, bigSrc2, bigDest, readDest):
1282 global header_output, exec_output
1283 src1Cnt = src2Cnt = destCnt = 2
1284 src1Prefix = src2Prefix = destPrefix = ''
1285 if bigSrc1:
1286 src1Cnt = 4
1287 src1Prefix = 'Big'
1288 if bigSrc2:
1289 src2Cnt = 4
1290 src2Prefix = 'Big'
1291 if bigDest:
1292 destCnt = 4
1293 destPrefix = 'Big'
1294 eWalkCode = simdEnabledCheckCode + '''
1295 %sRegVect srcReg1;
1296 %sRegVect srcReg2;
1297 %sRegVect destReg;
1298 ''' % (src1Prefix, src2Prefix, destPrefix)
1299 for reg in range(src1Cnt):
1300 eWalkCode += '''
1301 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302 ''' % { "reg" : reg }
1303 for reg in range(src2Cnt):
1304 eWalkCode += '''
1305 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306 ''' % { "reg" : reg }
1307 if readDest:
1308 for reg in range(destCnt):
1309 eWalkCode += '''
1310 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311 ''' % { "reg" : reg }
1312 readDestCode = ''
1313 if readDest:
1314 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315 eWalkCode += '''
1316 for (unsigned i = 0; i < eCount; i++) {
1317 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319 %(destPrefix)sElement destElem;
1320 %(readDest)s
1321 %(op)s
1322 destReg.elements[i] = htog(destElem);
1323 }
1324 ''' % { "op" : op, "readDest" : readDestCode,
1325 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326 "destPrefix" : destPrefix }
1327 for reg in range(destCnt):
1328 eWalkCode += '''
1329 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "RegRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": 2,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegRegOpDeclare.subst(iop)
1338 exec_output += NeonUnequalRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345 threeUnequalRegInst(name, Name, opClass, types, op,
1346 True, True, False, readDest)
1347
1348 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349 threeUnequalRegInst(name, Name, opClass, types, op,
1350 False, False, True, readDest)
1351
1352 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353 threeUnequalRegInst(name, Name, opClass, types, op,
1354 True, False, True, readDest)
1355
1356 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357 global header_output, exec_output
1358 eWalkCode = simdEnabledCheckCode + '''
1359 RegVect srcReg1, srcReg2, destReg;
1360 '''
1361 for reg in range(rCount):
1362 eWalkCode += '''
1363 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365 ''' % { "reg" : reg }
1366 if readDest:
1367 eWalkCode += '''
1368 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369 ''' % { "reg" : reg }
1370 readDestCode = ''
1371 if readDest:
1372 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373 eWalkCode += '''
1374 if (imm < 0 && imm >= eCount) {
1375 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1376 mnemonic);
1377 } else {
1378 for (unsigned i = 0; i < eCount; i++) {
1379 Element srcElem1 = gtoh(srcReg1.elements[i]);
1380 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1381 Element destElem;
1382 %(readDest)s
1383 %(op)s
1384 destReg.elements[i] = htog(destElem);
1385 }
1386 }
1387 ''' % { "op" : op, "readDest" : readDestCode }
1388 for reg in range(rCount):
1389 eWalkCode += '''
1390 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1391 ''' % { "reg" : reg }
1392 iop = InstObjParams(name, Name,
1393 "RegRegRegImmOp",
1394 { "code": eWalkCode,
1395 "r_count": rCount,
1396 "predicate_test": predicateTest,
1397 "op_class": opClass }, [])
1398 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1399 exec_output += NeonEqualRegExecute.subst(iop)
1400 for type in types:
1401 substDict = { "targs" : type,
1402 "class_name" : Name }
1403 exec_output += NeonExecDeclare.subst(substDict)
1404
1405 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1406 global header_output, exec_output
1407 rCount = 2
1408 eWalkCode = simdEnabledCheckCode + '''
1409 RegVect srcReg1, srcReg2;
1410 BigRegVect destReg;
1411 '''
1412 for reg in range(rCount):
1413 eWalkCode += '''
1414 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1415 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1416 ''' % { "reg" : reg }
1417 if readDest:
1418 for reg in range(2 * rCount):
1419 eWalkCode += '''
1420 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1421 ''' % { "reg" : reg }
1422 readDestCode = ''
1423 if readDest:
1424 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1425 eWalkCode += '''
1426 if (imm < 0 && imm >= eCount) {
1427 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1428 mnemonic);
1429 } else {
1430 for (unsigned i = 0; i < eCount; i++) {
1431 Element srcElem1 = gtoh(srcReg1.elements[i]);
1432 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1433 BigElement destElem;
1434 %(readDest)s
1435 %(op)s
1436 destReg.elements[i] = htog(destElem);
1437 }
1438 }
1439 ''' % { "op" : op, "readDest" : readDestCode }
1440 for reg in range(2 * rCount):
1441 eWalkCode += '''
1442 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1443 ''' % { "reg" : reg }
1444 iop = InstObjParams(name, Name,
1445 "RegRegRegImmOp",
1446 { "code": eWalkCode,
1447 "r_count": rCount,
1448 "predicate_test": predicateTest,
1449 "op_class": opClass }, [])
1450 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1451 exec_output += NeonUnequalRegExecute.subst(iop)
1452 for type in types:
1453 substDict = { "targs" : type,
1454 "class_name" : Name }
1455 exec_output += NeonExecDeclare.subst(substDict)
1456
1457 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1458 global header_output, exec_output
1459 eWalkCode = simdEnabledCheckCode + '''
1460 typedef FloatReg FloatVect[rCount];
1461 FloatVect srcRegs1, srcRegs2, destRegs;
1462 '''
1463 for reg in range(rCount):
1464 eWalkCode += '''
1465 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1466 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1467 ''' % { "reg" : reg }
1468 if readDest:
1469 eWalkCode += '''
1470 destRegs[%(reg)d] = FpDestP%(reg)d;
1471 ''' % { "reg" : reg }
1472 readDestCode = ''
1473 if readDest:
1474 readDestCode = 'destReg = destRegs[i];'
1475 eWalkCode += '''
1476 if (imm < 0 && imm >= eCount) {
1477 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1478 mnemonic);
1479 } else {
1480 for (unsigned i = 0; i < rCount; i++) {
1481 FloatReg srcReg1 = srcRegs1[i];
1482 FloatReg srcReg2 = srcRegs2[imm];
1483 FloatReg destReg;
1484 %(readDest)s
1485 %(op)s
1486 destRegs[i] = destReg;
1487 }
1488 }
1489 ''' % { "op" : op, "readDest" : readDestCode }
1490 for reg in range(rCount):
1491 eWalkCode += '''
1492 FpDestP%(reg)d = destRegs[%(reg)d];
1493 ''' % { "reg" : reg }
1494 iop = InstObjParams(name, Name,
1495 "FpRegRegRegImmOp",
1496 { "code": eWalkCode,
1497 "r_count": rCount,
1498 "predicate_test": predicateTest,
1499 "op_class": opClass }, [])
1500 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1501 exec_output += NeonEqualRegExecute.subst(iop)
1502 for type in types:
1503 substDict = { "targs" : type,
1504 "class_name" : Name }
1505 exec_output += NeonExecDeclare.subst(substDict)
1506
1507 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1508 readDest=False, toInt=False, fromInt=False):
1509 global header_output, exec_output
1510 eWalkCode = simdEnabledCheckCode + '''
1511 RegVect srcRegs1, destRegs;
1512 '''
1513 for reg in range(rCount):
1514 eWalkCode += '''
1515 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1516 ''' % { "reg" : reg }
1517 if readDest:
1518 eWalkCode += '''
1519 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1520 ''' % { "reg" : reg }
1521 readDestCode = ''
1522 if readDest:
1523 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1524 if toInt:
1525 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1526 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1527 if fromInt:
1528 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1529 declDest = 'Element destElem;'
1530 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1531 if toInt:
1532 declDest = 'FloatRegBits destReg;'
1533 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1534 eWalkCode += '''
1535 for (unsigned i = 0; i < eCount; i++) {
1536 %(readOp)s
1537 %(declDest)s
1538 %(readDest)s
1539 %(op)s
1540 %(writeDest)s
1541 }
1542 ''' % { "readOp" : readOpCode,
1543 "declDest" : declDest,
1544 "readDest" : readDestCode,
1545 "op" : op,
1546 "writeDest" : writeDestCode }
1547 for reg in range(rCount):
1548 eWalkCode += '''
1549 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1550 ''' % { "reg" : reg }
1551 iop = InstObjParams(name, Name,
1552 "RegRegImmOp",
1553 { "code": eWalkCode,
1554 "r_count": rCount,
1555 "predicate_test": predicateTest,
1556 "op_class": opClass }, [])
1557 header_output += NeonRegRegImmOpDeclare.subst(iop)
1558 exec_output += NeonEqualRegExecute.subst(iop)
1559 for type in types:
1560 substDict = { "targs" : type,
1561 "class_name" : Name }
1562 exec_output += NeonExecDeclare.subst(substDict)
1563
1564 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1565 global header_output, exec_output
1566 eWalkCode = simdEnabledCheckCode + '''
1567 BigRegVect srcReg1;
1568 RegVect destReg;
1569 '''
1570 for reg in range(4):
1571 eWalkCode += '''
1572 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1573 ''' % { "reg" : reg }
1574 if readDest:
1575 for reg in range(2):
1576 eWalkCode += '''
1577 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1578 ''' % { "reg" : reg }
1579 readDestCode = ''
1580 if readDest:
1581 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1582 eWalkCode += '''
1583 for (unsigned i = 0; i < eCount; i++) {
1584 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1585 Element destElem;
1586 %(readDest)s
1587 %(op)s
1588 destReg.elements[i] = htog(destElem);
1589 }
1590 ''' % { "op" : op, "readDest" : readDestCode }
1591 for reg in range(2):
1592 eWalkCode += '''
1593 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1594 ''' % { "reg" : reg }
1595 iop = InstObjParams(name, Name,
1596 "RegRegImmOp",
1597 { "code": eWalkCode,
1598 "r_count": 2,
1599 "predicate_test": predicateTest,
1600 "op_class": opClass }, [])
1601 header_output += NeonRegRegImmOpDeclare.subst(iop)
1602 exec_output += NeonUnequalRegExecute.subst(iop)
1603 for type in types:
1604 substDict = { "targs" : type,
1605 "class_name" : Name }
1606 exec_output += NeonExecDeclare.subst(substDict)
1607
1608 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1609 global header_output, exec_output
1610 eWalkCode = simdEnabledCheckCode + '''
1611 RegVect srcReg1;
1612 BigRegVect destReg;
1613 '''
1614 for reg in range(2):
1615 eWalkCode += '''
1616 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1617 ''' % { "reg" : reg }
1618 if readDest:
1619 for reg in range(4):
1620 eWalkCode += '''
1621 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1622 ''' % { "reg" : reg }
1623 readDestCode = ''
1624 if readDest:
1625 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1626 eWalkCode += '''
1627 for (unsigned i = 0; i < eCount; i++) {
1628 Element srcElem1 = gtoh(srcReg1.elements[i]);
1629 BigElement destElem;
1630 %(readDest)s
1631 %(op)s
1632 destReg.elements[i] = htog(destElem);
1633 }
1634 ''' % { "op" : op, "readDest" : readDestCode }
1635 for reg in range(4):
1636 eWalkCode += '''
1637 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1638 ''' % { "reg" : reg }
1639 iop = InstObjParams(name, Name,
1640 "RegRegImmOp",
1641 { "code": eWalkCode,
1642 "r_count": 2,
1643 "predicate_test": predicateTest,
1644 "op_class": opClass }, [])
1645 header_output += NeonRegRegImmOpDeclare.subst(iop)
1646 exec_output += NeonUnequalRegExecute.subst(iop)
1647 for type in types:
1648 substDict = { "targs" : type,
1649 "class_name" : Name }
1650 exec_output += NeonExecDeclare.subst(substDict)
1651
1652 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1653 global header_output, exec_output
1654 eWalkCode = simdEnabledCheckCode + '''
1655 RegVect srcReg1, destReg;
1656 '''
1657 for reg in range(rCount):
1658 eWalkCode += '''
1659 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1660 ''' % { "reg" : reg }
1661 if readDest:
1662 eWalkCode += '''
1663 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1664 ''' % { "reg" : reg }
1665 readDestCode = ''
1666 if readDest:
1667 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1668 eWalkCode += '''
1669 for (unsigned i = 0; i < eCount; i++) {
1670 unsigned j = i;
1671 Element srcElem1 = gtoh(srcReg1.elements[i]);
1672 Element destElem;
1673 %(readDest)s
1674 %(op)s
1675 destReg.elements[j] = htog(destElem);
1676 }
1677 ''' % { "op" : op, "readDest" : readDestCode }
1678 for reg in range(rCount):
1679 eWalkCode += '''
1680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1681 ''' % { "reg" : reg }
1682 iop = InstObjParams(name, Name,
1683 "RegRegOp",
1684 { "code": eWalkCode,
1685 "r_count": rCount,
1686 "predicate_test": predicateTest,
1687 "op_class": opClass }, [])
1688 header_output += NeonRegRegOpDeclare.subst(iop)
1689 exec_output += NeonEqualRegExecute.subst(iop)
1690 for type in types:
1691 substDict = { "targs" : type,
1692 "class_name" : Name }
1693 exec_output += NeonExecDeclare.subst(substDict)
1694
1695 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1696 global header_output, exec_output
1697 eWalkCode = simdEnabledCheckCode + '''
1698 RegVect srcReg1, destReg;
1699 '''
1700 for reg in range(rCount):
1701 eWalkCode += '''
1702 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1703 ''' % { "reg" : reg }
1704 if readDest:
1705 eWalkCode += '''
1706 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1707 ''' % { "reg" : reg }
1708 readDestCode = ''
1709 if readDest:
1710 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1711 eWalkCode += '''
1712 for (unsigned i = 0; i < eCount; i++) {
1713 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1714 Element destElem;
1715 %(readDest)s
1716 %(op)s
1717 destReg.elements[i] = htog(destElem);
1718 }
1719 ''' % { "op" : op, "readDest" : readDestCode }
1720 for reg in range(rCount):
1721 eWalkCode += '''
1722 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1723 ''' % { "reg" : reg }
1724 iop = InstObjParams(name, Name,
1725 "RegRegImmOp",
1726 { "code": eWalkCode,
1727 "r_count": rCount,
1728 "predicate_test": predicateTest,
1729 "op_class": opClass }, [])
1730 header_output += NeonRegRegImmOpDeclare.subst(iop)
1731 exec_output += NeonEqualRegExecute.subst(iop)
1732 for type in types:
1733 substDict = { "targs" : type,
1734 "class_name" : Name }
1735 exec_output += NeonExecDeclare.subst(substDict)
1736
1737 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1738 global header_output, exec_output
1739 eWalkCode = simdEnabledCheckCode + '''
1740 RegVect srcReg1, destReg;
1741 '''
1742 for reg in range(rCount):
1743 eWalkCode += '''
1744 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1745 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1746 ''' % { "reg" : reg }
1747 if readDest:
1748 eWalkCode += '''
1749 ''' % { "reg" : reg }
1750 readDestCode = ''
1751 if readDest:
1752 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1753 eWalkCode += op
1754 for reg in range(rCount):
1755 eWalkCode += '''
1756 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1757 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1758 ''' % { "reg" : reg }
1759 iop = InstObjParams(name, Name,
1760 "RegRegOp",
1761 { "code": eWalkCode,
1762 "r_count": rCount,
1763 "predicate_test": predicateTest,
1764 "op_class": opClass }, [])
1765 header_output += NeonRegRegOpDeclare.subst(iop)
1766 exec_output += NeonEqualRegExecute.subst(iop)
1767 for type in types:
1768 substDict = { "targs" : type,
1769 "class_name" : Name }
1770 exec_output += NeonExecDeclare.subst(substDict)
1771
1772 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1773 readDest=False, toInt=False):
1774 global header_output, exec_output
1775 eWalkCode = simdEnabledCheckCode + '''
1776 typedef FloatReg FloatVect[rCount];
1777 FloatVect srcRegs1;
1778 '''
1779 if toInt:
1780 eWalkCode += 'RegVect destRegs;\n'
1781 else:
1782 eWalkCode += 'FloatVect destRegs;\n'
1783 for reg in range(rCount):
1784 eWalkCode += '''
1785 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1786 ''' % { "reg" : reg }
1787 if readDest:
1788 if toInt:
1789 eWalkCode += '''
1790 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1791 ''' % { "reg" : reg }
1792 else:
1793 eWalkCode += '''
1794 destRegs[%(reg)d] = FpDestP%(reg)d;
1795 ''' % { "reg" : reg }
1796 readDestCode = ''
1797 if readDest:
1798 readDestCode = 'destReg = destRegs[i];'
1799 destType = 'FloatReg'
1800 writeDest = 'destRegs[r] = destReg;'
1801 if toInt:
1802 destType = 'FloatRegBits'
1803 writeDest = 'destRegs.regs[r] = destReg;'
1804 eWalkCode += '''
1805 for (unsigned r = 0; r < rCount; r++) {
1806 FloatReg srcReg1 = srcRegs1[r];
1807 %(destType)s destReg;
1808 %(readDest)s
1809 %(op)s
1810 %(writeDest)s
1811 }
1812 ''' % { "op" : op,
1813 "readDest" : readDestCode,
1814 "destType" : destType,
1815 "writeDest" : writeDest }
1816 for reg in range(rCount):
1817 if toInt:
1818 eWalkCode += '''
1819 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1820 ''' % { "reg" : reg }
1821 else:
1822 eWalkCode += '''
1823 FpDestP%(reg)d = destRegs[%(reg)d];
1824 ''' % { "reg" : reg }
1825 iop = InstObjParams(name, Name,
1826 "FpRegRegOp",
1827 { "code": eWalkCode,
1828 "r_count": rCount,
1829 "predicate_test": predicateTest,
1830 "op_class": opClass }, [])
1831 header_output += NeonRegRegOpDeclare.subst(iop)
1832 exec_output += NeonEqualRegExecute.subst(iop)
1833 for type in types:
1834 substDict = { "targs" : type,
1835 "class_name" : Name }
1836 exec_output += NeonExecDeclare.subst(substDict)
1837
1838 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1839 global header_output, exec_output
1840 eWalkCode = simdEnabledCheckCode + '''
1841 RegVect srcRegs;
1842 BigRegVect destReg;
1843 '''
1844 for reg in range(rCount):
1845 eWalkCode += '''
1846 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1847 ''' % { "reg" : reg }
1848 if readDest:
1849 eWalkCode += '''
1850 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1851 ''' % { "reg" : reg }
1852 readDestCode = ''
1853 if readDest:
1854 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1855 eWalkCode += '''
1856 for (unsigned i = 0; i < eCount / 2; i++) {
1857 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1858 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1859 BigElement destElem;
1860 %(readDest)s
1861 %(op)s
1862 destReg.elements[i] = htog(destElem);
1863 }
1864 ''' % { "op" : op, "readDest" : readDestCode }
1865 for reg in range(rCount):
1866 eWalkCode += '''
1867 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1868 ''' % { "reg" : reg }
1869 iop = InstObjParams(name, Name,
1870 "RegRegOp",
1871 { "code": eWalkCode,
1872 "r_count": rCount,
1873 "predicate_test": predicateTest,
1874 "op_class": opClass }, [])
1875 header_output += NeonRegRegOpDeclare.subst(iop)
1876 exec_output += NeonUnequalRegExecute.subst(iop)
1877 for type in types:
1878 substDict = { "targs" : type,
1879 "class_name" : Name }
1880 exec_output += NeonExecDeclare.subst(substDict)
1881
1882 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1883 global header_output, exec_output
1884 eWalkCode = simdEnabledCheckCode + '''
1885 BigRegVect srcReg1;
1886 RegVect destReg;
1887 '''
1888 for reg in range(4):
1889 eWalkCode += '''
1890 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1891 ''' % { "reg" : reg }
1892 if readDest:
1893 for reg in range(2):
1894 eWalkCode += '''
1895 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1896 ''' % { "reg" : reg }
1897 readDestCode = ''
1898 if readDest:
1899 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1900 eWalkCode += '''
1901 for (unsigned i = 0; i < eCount; i++) {
1902 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1903 Element destElem;
1904 %(readDest)s
1905 %(op)s
1906 destReg.elements[i] = htog(destElem);
1907 }
1908 ''' % { "op" : op, "readDest" : readDestCode }
1909 for reg in range(2):
1910 eWalkCode += '''
1911 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1912 ''' % { "reg" : reg }
1913 iop = InstObjParams(name, Name,
1914 "RegRegOp",
1915 { "code": eWalkCode,
1916 "r_count": 2,
1917 "predicate_test": predicateTest,
1918 "op_class": opClass }, [])
1919 header_output += NeonRegRegOpDeclare.subst(iop)
1920 exec_output += NeonUnequalRegExecute.subst(iop)
1921 for type in types:
1922 substDict = { "targs" : type,
1923 "class_name" : Name }
1924 exec_output += NeonExecDeclare.subst(substDict)
1925
1926 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1927 global header_output, exec_output
1928 eWalkCode = simdEnabledCheckCode + '''
1929 RegVect destReg;
1930 '''
1931 if readDest:
1932 for reg in range(rCount):
1933 eWalkCode += '''
1934 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1935 ''' % { "reg" : reg }
1936 readDestCode = ''
1937 if readDest:
1938 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1939 eWalkCode += '''
1940 for (unsigned i = 0; i < eCount; i++) {
1941 Element destElem;
1942 %(readDest)s
1943 %(op)s
1944 destReg.elements[i] = htog(destElem);
1945 }
1946 ''' % { "op" : op, "readDest" : readDestCode }
1947 for reg in range(rCount):
1948 eWalkCode += '''
1949 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1950 ''' % { "reg" : reg }
1951 iop = InstObjParams(name, Name,
1952 "RegImmOp",
1953 { "code": eWalkCode,
1954 "r_count": rCount,
1955 "predicate_test": predicateTest,
1956 "op_class": opClass }, [])
1957 header_output += NeonRegImmOpDeclare.subst(iop)
1958 exec_output += NeonEqualRegExecute.subst(iop)
1959 for type in types:
1960 substDict = { "targs" : type,
1961 "class_name" : Name }
1962 exec_output += NeonExecDeclare.subst(substDict)
1963
1964 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1965 global header_output, exec_output
1966 eWalkCode = simdEnabledCheckCode + '''
1967 RegVect srcReg1;
1968 BigRegVect destReg;
1969 '''
1970 for reg in range(2):
1971 eWalkCode += '''
1972 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1973 ''' % { "reg" : reg }
1974 if readDest:
1975 for reg in range(4):
1976 eWalkCode += '''
1977 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1978 ''' % { "reg" : reg }
1979 readDestCode = ''
1980 if readDest:
1981 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1982 eWalkCode += '''
1983 for (unsigned i = 0; i < eCount; i++) {
1984 Element srcElem1 = gtoh(srcReg1.elements[i]);
1985 BigElement destElem;
1986 %(readDest)s
1987 %(op)s
1988 destReg.elements[i] = htog(destElem);
1989 }
1990 ''' % { "op" : op, "readDest" : readDestCode }
1991 for reg in range(4):
1992 eWalkCode += '''
1993 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1994 ''' % { "reg" : reg }
1995 iop = InstObjParams(name, Name,
1996 "RegRegOp",
1997 { "code": eWalkCode,
1998 "r_count": 2,
1999 "predicate_test": predicateTest,
2000 "op_class": opClass }, [])
2001 header_output += NeonRegRegOpDeclare.subst(iop)
2002 exec_output += NeonUnequalRegExecute.subst(iop)
2003 for type in types:
2004 substDict = { "targs" : type,
2005 "class_name" : Name }
2006 exec_output += NeonExecDeclare.subst(substDict)
2007
2008 vhaddCode = '''
2009 Element carryBit =
2010 (((unsigned)srcElem1 & 0x1) +
2011 ((unsigned)srcElem2 & 0x1)) >> 1;
2012 // Use division instead of a shift to ensure the sign extension works
2013 // right. The compiler will figure out if it can be a shift. Mask the
2014 // inputs so they get truncated correctly.
2015 destElem = (((srcElem1 & ~(Element)1) / 2) +
2016 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2017 '''
2018 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2019 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2020
2021 vrhaddCode = '''
2022 Element carryBit =
2023 (((unsigned)srcElem1 & 0x1) +
2024 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2025 // Use division instead of a shift to ensure the sign extension works
2026 // right. The compiler will figure out if it can be a shift. Mask the
2027 // inputs so they get truncated correctly.
2028 destElem = (((srcElem1 & ~(Element)1) / 2) +
2029 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2030 '''
2031 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2032 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2033
2034 vhsubCode = '''
2035 Element barrowBit =
2036 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2037 // Use division instead of a shift to ensure the sign extension works
2038 // right. The compiler will figure out if it can be a shift. Mask the
2039 // inputs so they get truncated correctly.
2040 destElem = (((srcElem1 & ~(Element)1) / 2) -
2041 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2042 '''
2043 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2044 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2045
2046 vandCode = '''
2047 destElem = srcElem1 & srcElem2;
2048 '''
2049 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2050 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2051
2052 vbicCode = '''
2053 destElem = srcElem1 & ~srcElem2;
2054 '''
2055 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2056 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2057
2058 vorrCode = '''
2059 destElem = srcElem1 | srcElem2;
2060 '''
2061 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2062 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2063
2064 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2065 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2066
2067 vornCode = '''
2068 destElem = srcElem1 | ~srcElem2;
2069 '''
2070 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2071 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2072
2073 veorCode = '''
2074 destElem = srcElem1 ^ srcElem2;
2075 '''
2076 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2077 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2078
2079 vbifCode = '''
2080 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2081 '''
2082 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2083 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2084 vbitCode = '''
2085 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2086 '''
2087 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2088 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2089 vbslCode = '''
2090 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2091 '''
2092 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2093 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2094
2095 vmaxCode = '''
2096 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2097 '''
2098 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2099 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2100
2101 vminCode = '''
2102 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2103 '''
2104 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2105 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2106
2107 vaddCode = '''
2108 destElem = srcElem1 + srcElem2;
2109 '''
2110 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2111 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2112
2113 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2114 2, vaddCode, pairwise=True)
2115 vaddlwCode = '''
2116 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2117 '''
2118 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2119 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2120 vaddhnCode = '''
2121 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2122 (sizeof(Element) * 8);
2123 '''
2124 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2125 vraddhnCode = '''
2126 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2127 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2128 (sizeof(Element) * 8);
2129 '''
2130 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2131
2132 vsubCode = '''
2133 destElem = srcElem1 - srcElem2;
2134 '''
2135 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2136 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2137 vsublwCode = '''
2138 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2139 '''
2140 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2141 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2142
2143 vqaddUCode = '''
2144 destElem = srcElem1 + srcElem2;
2145 FPSCR fpscr = (FPSCR) FpscrQc;
2146 if (destElem < srcElem1 || destElem < srcElem2) {
2147 destElem = (Element)(-1);
2148 fpscr.qc = 1;
2149 }
2150 FpscrQc = fpscr;
2151 '''
2152 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2153 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2154 vsubhnCode = '''
2155 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2156 (sizeof(Element) * 8);
2157 '''
2158 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2159 vrsubhnCode = '''
2160 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2161 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2162 (sizeof(Element) * 8);
2163 '''
2164 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2165
2166 vqaddSCode = '''
2167 destElem = srcElem1 + srcElem2;
2168 FPSCR fpscr = (FPSCR) FpscrQc;
2169 bool negDest = (destElem < 0);
2170 bool negSrc1 = (srcElem1 < 0);
2171 bool negSrc2 = (srcElem2 < 0);
2172 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2173 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2174 if (negDest)
2175 destElem -= 1;
2176 fpscr.qc = 1;
2177 }
2178 FpscrQc = fpscr;
2179 '''
2180 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2181 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2182
2183 vqsubUCode = '''
2184 destElem = srcElem1 - srcElem2;
2185 FPSCR fpscr = (FPSCR) FpscrQc;
2186 if (destElem > srcElem1) {
2187 destElem = 0;
2188 fpscr.qc = 1;
2189 }
2190 FpscrQc = fpscr;
2191 '''
2192 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2193 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2194
2195 vqsubSCode = '''
2196 destElem = srcElem1 - srcElem2;
2197 FPSCR fpscr = (FPSCR) FpscrQc;
2198 bool negDest = (destElem < 0);
2199 bool negSrc1 = (srcElem1 < 0);
2200 bool posSrc2 = (srcElem2 >= 0);
2201 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2202 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2203 if (negDest)
2204 destElem -= 1;
2205 fpscr.qc = 1;
2206 }
2207 FpscrQc = fpscr;
2208 '''
2209 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2210 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2211
2212 vcgtCode = '''
2213 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2214 '''
2215 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2216 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2217
2218 vcgeCode = '''
2219 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2220 '''
2221 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2222 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2223
2224 vceqCode = '''
2225 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2226 '''
2227 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2228 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2229
2230 vshlCode = '''
2231 int16_t shiftAmt = (int8_t)srcElem2;
2232 if (shiftAmt < 0) {
2233 shiftAmt = -shiftAmt;
2234 if (shiftAmt >= sizeof(Element) * 8) {
2235 shiftAmt = sizeof(Element) * 8 - 1;
2236 destElem = 0;
2237 } else {
2238 destElem = (srcElem1 >> shiftAmt);
2239 }
2240 // Make sure the right shift sign extended when it should.
2241 if (ltz(srcElem1) && !ltz(destElem)) {
2242 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2243 1 - shiftAmt));
2244 }
2245 } else {
2246 if (shiftAmt >= sizeof(Element) * 8) {
2247 destElem = 0;
2248 } else {
2249 destElem = srcElem1 << shiftAmt;
2250 }
2251 }
2252 '''
2253 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2254 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2255
2256 vrshlCode = '''
2257 int16_t shiftAmt = (int8_t)srcElem2;
2258 if (shiftAmt < 0) {
2259 shiftAmt = -shiftAmt;
2260 Element rBit = 0;
2261 if (shiftAmt <= sizeof(Element) * 8)
2262 rBit = bits(srcElem1, shiftAmt - 1);
2263 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2264 rBit = 1;
2265 if (shiftAmt >= sizeof(Element) * 8) {
2266 shiftAmt = sizeof(Element) * 8 - 1;
2267 destElem = 0;
2268 } else {
2269 destElem = (srcElem1 >> shiftAmt);
2270 }
2271 // Make sure the right shift sign extended when it should.
2272 if (ltz(srcElem1) && !ltz(destElem)) {
2273 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2274 1 - shiftAmt));
2275 }
2276 destElem += rBit;
2277 } else if (shiftAmt > 0) {
2278 if (shiftAmt >= sizeof(Element) * 8) {
2279 destElem = 0;
2280 } else {
2281 destElem = srcElem1 << shiftAmt;
2282 }
2283 } else {
2284 destElem = srcElem1;
2285 }
2286 '''
2287 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2288 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2289
2290 vqshlUCode = '''
2291 int16_t shiftAmt = (int8_t)srcElem2;
2292 FPSCR fpscr = (FPSCR) FpscrQc;
2293 if (shiftAmt < 0) {
2294 shiftAmt = -shiftAmt;
2295 if (shiftAmt >= sizeof(Element) * 8) {
2296 shiftAmt = sizeof(Element) * 8 - 1;
2297 destElem = 0;
2298 } else {
2299 destElem = (srcElem1 >> shiftAmt);
2300 }
2301 } else if (shiftAmt > 0) {
2302 if (shiftAmt >= sizeof(Element) * 8) {
2303 if (srcElem1 != 0) {
2304 destElem = mask(sizeof(Element) * 8);
2305 fpscr.qc = 1;
2306 } else {
2307 destElem = 0;
2308 }
2309 } else {
2310 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2311 sizeof(Element) * 8 - shiftAmt)) {
2312 destElem = mask(sizeof(Element) * 8);
2313 fpscr.qc = 1;
2314 } else {
2315 destElem = srcElem1 << shiftAmt;
2316 }
2317 }
2318 } else {
2319 destElem = srcElem1;
2320 }
2321 FpscrQc = fpscr;
2322 '''
2323 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2324 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2325
2326 vqshlSCode = '''
2327 int16_t shiftAmt = (int8_t)srcElem2;
2328 FPSCR fpscr = (FPSCR) FpscrQc;
2329 if (shiftAmt < 0) {
2330 shiftAmt = -shiftAmt;
2331 if (shiftAmt >= sizeof(Element) * 8) {
2332 shiftAmt = sizeof(Element) * 8 - 1;
2333 destElem = 0;
2334 } else {
2335 destElem = (srcElem1 >> shiftAmt);
2336 }
2337 // Make sure the right shift sign extended when it should.
2338 if (srcElem1 < 0 && destElem >= 0) {
2339 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2340 1 - shiftAmt));
2341 }
2342 } else if (shiftAmt > 0) {
2343 bool sat = false;
2344 if (shiftAmt >= sizeof(Element) * 8) {
2345 if (srcElem1 != 0)
2346 sat = true;
2347 else
2348 destElem = 0;
2349 } else {
2350 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2351 sizeof(Element) * 8 - 1 - shiftAmt) !=
2352 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2353 sat = true;
2354 } else {
2355 destElem = srcElem1 << shiftAmt;
2356 }
2357 }
2358 if (sat) {
2359 fpscr.qc = 1;
2360 destElem = mask(sizeof(Element) * 8 - 1);
2361 if (srcElem1 < 0)
2362 destElem = ~destElem;
2363 }
2364 } else {
2365 destElem = srcElem1;
2366 }
2367 FpscrQc = fpscr;
2368 '''
2369 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2370 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2371
2372 vqrshlUCode = '''
2373 int16_t shiftAmt = (int8_t)srcElem2;
2374 FPSCR fpscr = (FPSCR) FpscrQc;
2375 if (shiftAmt < 0) {
2376 shiftAmt = -shiftAmt;
2377 Element rBit = 0;
2378 if (shiftAmt <= sizeof(Element) * 8)
2379 rBit = bits(srcElem1, shiftAmt - 1);
2380 if (shiftAmt >= sizeof(Element) * 8) {
2381 shiftAmt = sizeof(Element) * 8 - 1;
2382 destElem = 0;
2383 } else {
2384 destElem = (srcElem1 >> shiftAmt);
2385 }
2386 destElem += rBit;
2387 } else {
2388 if (shiftAmt >= sizeof(Element) * 8) {
2389 if (srcElem1 != 0) {
2390 destElem = mask(sizeof(Element) * 8);
2391 fpscr.qc = 1;
2392 } else {
2393 destElem = 0;
2394 }
2395 } else {
2396 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2397 sizeof(Element) * 8 - shiftAmt)) {
2398 destElem = mask(sizeof(Element) * 8);
2399 fpscr.qc = 1;
2400 } else {
2401 destElem = srcElem1 << shiftAmt;
2402 }
2403 }
2404 }
2405 FpscrQc = fpscr;
2406 '''
2407 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2408 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2409
2410 vqrshlSCode = '''
2411 int16_t shiftAmt = (int8_t)srcElem2;
2412 FPSCR fpscr = (FPSCR) FpscrQc;
2413 if (shiftAmt < 0) {
2414 shiftAmt = -shiftAmt;
2415 Element rBit = 0;
2416 if (shiftAmt <= sizeof(Element) * 8)
2417 rBit = bits(srcElem1, shiftAmt - 1);
2418 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2419 rBit = 1;
2420 if (shiftAmt >= sizeof(Element) * 8) {
2421 shiftAmt = sizeof(Element) * 8 - 1;
2422 destElem = 0;
2423 } else {
2424 destElem = (srcElem1 >> shiftAmt);
2425 }
2426 // Make sure the right shift sign extended when it should.
2427 if (srcElem1 < 0 && destElem >= 0) {
2428 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2429 1 - shiftAmt));
2430 }
2431 destElem += rBit;
2432 } else if (shiftAmt > 0) {
2433 bool sat = false;
2434 if (shiftAmt >= sizeof(Element) * 8) {
2435 if (srcElem1 != 0)
2436 sat = true;
2437 else
2438 destElem = 0;
2439 } else {
2440 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2441 sizeof(Element) * 8 - 1 - shiftAmt) !=
2442 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2443 sat = true;
2444 } else {
2445 destElem = srcElem1 << shiftAmt;
2446 }
2447 }
2448 if (sat) {
2449 fpscr.qc = 1;
2450 destElem = mask(sizeof(Element) * 8 - 1);
2451 if (srcElem1 < 0)
2452 destElem = ~destElem;
2453 }
2454 } else {
2455 destElem = srcElem1;
2456 }
2457 FpscrQc = fpscr;
2458 '''
2459 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2460 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2461
2462 vabaCode = '''
2463 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2464 (srcElem2 - srcElem1);
2465 '''
2466 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2467 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2468 vabalCode = '''
2469 destElem += (srcElem1 > srcElem2) ?
2470 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2471 ((BigElement)srcElem2 - (BigElement)srcElem1);
2472 '''
2473 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2474
2475 vabdCode = '''
2476 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2477 (srcElem2 - srcElem1);
2478 '''
2479 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2480 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2481 vabdlCode = '''
2482 destElem = (srcElem1 > srcElem2) ?
2483 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2484 ((BigElement)srcElem2 - (BigElement)srcElem1);
2485 '''
2486 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2487
2488 vtstCode = '''
2489 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2490 '''
2491 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2492 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2493
2494 vmulCode = '''
2495 destElem = srcElem1 * srcElem2;
2496 '''
2497 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2498 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2499 vmullCode = '''
2500 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2501 '''
2502 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2503
2504 vmlaCode = '''
2505 destElem = destElem + srcElem1 * srcElem2;
2506 '''
2507 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2508 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2509 vmlalCode = '''
2510 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2511 '''
2512 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2513
2514 vqdmlalCode = '''
2515 FPSCR fpscr = (FPSCR) FpscrQc;
2516 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2517 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2518 Element halfNeg = maxNeg / 2;
2519 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2520 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2521 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2522 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2523 fpscr.qc = 1;
2524 }
2525 bool negPreDest = ltz(destElem);
2526 destElem += midElem;
2527 bool negDest = ltz(destElem);
2528 bool negMid = ltz(midElem);
2529 if (negPreDest == negMid && negMid != negDest) {
2530 destElem = mask(sizeof(BigElement) * 8 - 1);
2531 if (negPreDest)
2532 destElem = ~destElem;
2533 fpscr.qc = 1;
2534 }
2535 FpscrQc = fpscr;
2536 '''
2537 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2538
2539 vqdmlslCode = '''
2540 FPSCR fpscr = (FPSCR) FpscrQc;
2541 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2542 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2543 Element halfNeg = maxNeg / 2;
2544 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2545 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2546 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2547 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2548 fpscr.qc = 1;
2549 }
2550 bool negPreDest = ltz(destElem);
2551 destElem -= midElem;
2552 bool negDest = ltz(destElem);
2553 bool posMid = ltz((BigElement)-midElem);
2554 if (negPreDest == posMid && posMid != negDest) {
2555 destElem = mask(sizeof(BigElement) * 8 - 1);
2556 if (negPreDest)
2557 destElem = ~destElem;
2558 fpscr.qc = 1;
2559 }
2560 FpscrQc = fpscr;
2561 '''
2562 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2563
2564 vqdmullCode = '''
2565 FPSCR fpscr = (FPSCR) FpscrQc;
2566 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2567 if (srcElem1 == srcElem2 &&
2568 srcElem1 == (Element)((Element)1 <<
2569 (Element)(sizeof(Element) * 8 - 1))) {
2570 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2571 fpscr.qc = 1;
2572 }
2573 FpscrQc = fpscr;
2574 '''
2575 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2576
2577 vmlsCode = '''
2578 destElem = destElem - srcElem1 * srcElem2;
2579 '''
2580 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2581 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2582 vmlslCode = '''
2583 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2584 '''
2585 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2586
2587 vmulpCode = '''
2588 destElem = 0;
2589 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2590 if (bits(srcElem2, j))
2591 destElem ^= srcElem1 << j;
2592 }
2593 '''
2594 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2595 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2596 vmullpCode = '''
2597 destElem = 0;
2598 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2599 if (bits(srcElem2, j))
2600 destElem ^= (BigElement)srcElem1 << j;
2601 }
2602 '''
2603 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2604
2605 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2606
2607 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2608
2609 vqdmulhCode = '''
2610 FPSCR fpscr = (FPSCR) FpscrQc;
2611 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2612 (sizeof(Element) * 8);
2613 if (srcElem1 == srcElem2 &&
2614 srcElem1 == (Element)((Element)1 <<
2615 (sizeof(Element) * 8 - 1))) {
2616 destElem = ~srcElem1;
2617 fpscr.qc = 1;
2618 }
2619 FpscrQc = fpscr;
2620 '''
2621 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2622 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2623
2624 vqrdmulhCode = '''
2625 FPSCR fpscr = (FPSCR) FpscrQc;
2626 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2627 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2628 (sizeof(Element) * 8);
2629 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2630 Element halfNeg = maxNeg / 2;
2631 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2632 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2633 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2634 if (destElem < 0) {
2635 destElem = mask(sizeof(Element) * 8 - 1);
2636 } else {
2637 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2638 }
2639 fpscr.qc = 1;
2640 }
2641 FpscrQc = fpscr;
2642 '''
2643 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2644 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2645 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2646 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2647
2648 vmaxfpCode = '''
2649 FPSCR fpscr = (FPSCR) FpscrExc;
2650 bool done;
2651 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2652 if (!done) {
2653 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2654 true, true, VfpRoundNearest);
2655 } else if (flushToZero(srcReg1, srcReg2)) {
2656 fpscr.idc = 1;
2657 }
2658 FpscrExc = fpscr;
2659 '''
2660 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2661 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2662
2663 vminfpCode = '''
2664 FPSCR fpscr = (FPSCR) FpscrExc;
2665 bool done;
2666 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2667 if (!done) {
2668 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2669 true, true, VfpRoundNearest);
2670 } else if (flushToZero(srcReg1, srcReg2)) {
2671 fpscr.idc = 1;
2672 }
2673 FpscrExc = fpscr;
2674 '''
2675 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2676 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2677
2678 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2679 2, vmaxfpCode, pairwise=True)
2680 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2681 4, vmaxfpCode, pairwise=True)
2682
2683 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2684 2, vminfpCode, pairwise=True)
2685 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2686 4, vminfpCode, pairwise=True)
2687
2688 vaddfpCode = '''
2689 FPSCR fpscr = (FPSCR) FpscrExc;
2690 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2691 true, true, VfpRoundNearest);
2692 FpscrExc = fpscr;
2693 '''
2694 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2695 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2696
2697 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2698 2, vaddfpCode, pairwise=True)
2699 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2700 4, vaddfpCode, pairwise=True)
2701
2702 vsubfpCode = '''
2703 FPSCR fpscr = (FPSCR) FpscrExc;
2704 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2705 true, true, VfpRoundNearest);
2706 FpscrExc = fpscr;
2707 '''
2708 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2709 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2710
2711 vmulfpCode = '''
2712 FPSCR fpscr = (FPSCR) FpscrExc;
2713 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2714 true, true, VfpRoundNearest);
2715 FpscrExc = fpscr;
2716 '''
2717 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2718 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2719
2720 vmlafpCode = '''
2721 FPSCR fpscr = (FPSCR) FpscrExc;
2722 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2723 true, true, VfpRoundNearest);
2724 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2725 true, true, VfpRoundNearest);
2726 FpscrExc = fpscr;
2727 '''
2728 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2729 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2730
2731 vfmafpCode = '''
2732 FPSCR fpscr = (FPSCR) FpscrExc;
2733 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2734 true, true, VfpRoundNearest);
2735 FpscrExc = fpscr;
2736 '''
2737 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2738 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2739
2740 vfmsfpCode = '''
2741 FPSCR fpscr = (FPSCR) FpscrExc;
2742 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2743 true, true, VfpRoundNearest);
2744 FpscrExc = fpscr;
2745 '''
2746 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2747 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2748
2749 vmlsfpCode = '''
2750 FPSCR fpscr = (FPSCR) FpscrExc;
2751 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2752 true, true, VfpRoundNearest);
2753 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2754 true, true, VfpRoundNearest);
2755 FpscrExc = fpscr;
2756 '''
2757 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2758 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2759
2760 vcgtfpCode = '''
2761 FPSCR fpscr = (FPSCR) FpscrExc;
2762 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2763 true, true, VfpRoundNearest);
2764 destReg = (res == 0) ? -1 : 0;
2765 if (res == 2.0)
2766 fpscr.ioc = 1;
2767 FpscrExc = fpscr;
2768 '''
2769 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2770 2, vcgtfpCode, toInt = True)
2771 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2772 4, vcgtfpCode, toInt = True)
2773
2774 vcgefpCode = '''
2775 FPSCR fpscr = (FPSCR) FpscrExc;
2776 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2777 true, true, VfpRoundNearest);
2778 destReg = (res == 0) ? -1 : 0;
2779 if (res == 2.0)
2780 fpscr.ioc = 1;
2781 FpscrExc = fpscr;
2782 '''
2783 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2784 2, vcgefpCode, toInt = True)
2785 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2786 4, vcgefpCode, toInt = True)
2787
2788 vacgtfpCode = '''
2789 FPSCR fpscr = (FPSCR) FpscrExc;
2790 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2791 true, true, VfpRoundNearest);
2792 destReg = (res == 0) ? -1 : 0;
2793 if (res == 2.0)
2794 fpscr.ioc = 1;
2795 FpscrExc = fpscr;
2796 '''
2797 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2798 2, vacgtfpCode, toInt = True)
2799 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2800 4, vacgtfpCode, toInt = True)
2801
2802 vacgefpCode = '''
2803 FPSCR fpscr = (FPSCR) FpscrExc;
2804 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2805 true, true, VfpRoundNearest);
2806 destReg = (res == 0) ? -1 : 0;
2807 if (res == 2.0)
2808 fpscr.ioc = 1;
2809 FpscrExc = fpscr;
2810 '''
2811 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2812 2, vacgefpCode, toInt = True)
2813 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2814 4, vacgefpCode, toInt = True)
2815
2816 vceqfpCode = '''
2817 FPSCR fpscr = (FPSCR) FpscrExc;
2818 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2819 true, true, VfpRoundNearest);
2820 destReg = (res == 0) ? -1 : 0;
2821 if (res == 2.0)
2822 fpscr.ioc = 1;
2823 FpscrExc = fpscr;
2824 '''
2825 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2826 2, vceqfpCode, toInt = True)
2827 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2828 4, vceqfpCode, toInt = True)
2829
2830 vrecpsCode = '''
2831 FPSCR fpscr = (FPSCR) FpscrExc;
2832 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2833 true, true, VfpRoundNearest);
2834 FpscrExc = fpscr;
2835 '''
2836 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2837 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2838
2839 vrsqrtsCode = '''
2840 FPSCR fpscr = (FPSCR) FpscrExc;
2841 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2842 true, true, VfpRoundNearest);
2843 FpscrExc = fpscr;
2844 '''
2845 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2846 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2847
2848 vabdfpCode = '''
2849 FPSCR fpscr = (FPSCR) FpscrExc;
2850 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2851 true, true, VfpRoundNearest);
2852 destReg = fabs(mid);
2853 FpscrExc = fpscr;
2854 '''
2855 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2856 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2857
2858 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2859 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2860 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2861 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2862 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2863
2864 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2865 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2866 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2867 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2868 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2869
2870 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2871 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2872 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2873 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2874 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2875
2876 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2877 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2878 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2879 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2880 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2881 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2882 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2883 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2884 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2885
2886 vshrCode = '''
2887 if (imm >= sizeof(srcElem1) * 8) {
2888 if (ltz(srcElem1))
2889 destElem = -1;
2890 else
2891 destElem = 0;
2892 } else {
2893 destElem = srcElem1 >> imm;
2894 }
2895 '''
2896 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2897 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2898
2899 vsraCode = '''
2900 Element mid;;
2901 if (imm >= sizeof(srcElem1) * 8) {
2902 mid = ltz(srcElem1) ? -1 : 0;
2903 } else {
2904 mid = srcElem1 >> imm;
2905 if (ltz(srcElem1) && !ltz(mid)) {
2906 mid |= -(mid & ((Element)1 <<
2907 (sizeof(Element) * 8 - 1 - imm)));
2908 }
2909 }
2910 destElem += mid;
2911 '''
2912 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2913 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2914
2915 vrshrCode = '''
2916 if (imm > sizeof(srcElem1) * 8) {
2917 destElem = 0;
2918 } else if (imm) {
2919 Element rBit = bits(srcElem1, imm - 1);
2920 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2921 } else {
2922 destElem = srcElem1;
2923 }
2924 '''
2925 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2926 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2927
2928 vrsraCode = '''
2929 if (imm > sizeof(srcElem1) * 8) {
2930 destElem += 0;
2931 } else if (imm) {
2932 Element rBit = bits(srcElem1, imm - 1);
2933 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2934 } else {
2935 destElem += srcElem1;
2936 }
2937 '''
2938 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2939 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2940
2941 vsriCode = '''
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133 IntRegIndex dest, IntRegIndex op1,
134 IntRegIndex op2)
135 {
136 switch (size) {
137 case 1:
138 return new Base<int16_t>(machInst, dest, op1, op2);
139 case 2:
140 return new Base<int32_t>(machInst, dest, op1, op2);
141 default:
142 return new Unknown(machInst);
143 }
144 }
145
146 template <template <typename T> class Base>
147 StaticInstPtr
148 decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149 IntRegIndex dest, IntRegIndex op1,
150 IntRegIndex op2, uint64_t imm)
151 {
152 switch (size) {
153 case 1:
154 return new Base<int16_t>(machInst, dest, op1, op2, imm);
155 case 2:
156 return new Base<int32_t>(machInst, dest, op1, op2, imm);
157 default:
158 return new Unknown(machInst);
159 }
160 }
161
162 template <template <typename T> class Base>
163 StaticInstPtr
164 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (notSigned) {
169 return decodeNeonUThreeUSReg<Base>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<Base>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUThreeSReg(bool q, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (q) {
185 return decodeNeonUThreeUSReg<BaseQ>(
186 size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonUThreeUSReg<BaseD>(
189 size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonSThreeSReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonSThreeUSReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonSThreeUSReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeXReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUSReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUThreeXReg(bool q, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (q) {
233 return decodeNeonUThreeUReg<BaseQ>(
234 size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonUThreeUSReg<BaseD>(
237 size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, IntRegIndex op2)
247 {
248 if (notSigned) {
249 return decodeNeonUThreeSReg<BaseD, BaseQ>(
250 q, size, machInst, dest, op1, op2);
251 } else {
252 return decodeNeonSThreeSReg<BaseD, BaseQ>(
253 q, size, machInst, dest, op1, op2);
254 }
255 }
256
257 template <template <typename T> class BaseD,
258 template <typename T> class BaseQ>
259 StaticInstPtr
260 decodeNeonUThreeReg(bool q, unsigned size,
261 ExtMachInst machInst, IntRegIndex dest,
262 IntRegIndex op1, IntRegIndex op2)
263 {
264 if (q) {
265 return decodeNeonUThreeUReg<BaseQ>(
266 size, machInst, dest, op1, op2);
267 } else {
268 return decodeNeonUThreeUReg<BaseD>(
269 size, machInst, dest, op1, op2);
270 }
271 }
272
273 template <template <typename T> class BaseD,
274 template <typename T> class BaseQ>
275 StaticInstPtr
276 decodeNeonSThreeReg(bool q, unsigned size,
277 ExtMachInst machInst, IntRegIndex dest,
278 IntRegIndex op1, IntRegIndex op2)
279 {
280 if (q) {
281 return decodeNeonSThreeUReg<BaseQ>(
282 size, machInst, dest, op1, op2);
283 } else {
284 return decodeNeonSThreeUReg<BaseD>(
285 size, machInst, dest, op1, op2);
286 }
287 }
288
289 template <template <typename T> class BaseD,
290 template <typename T> class BaseQ>
291 StaticInstPtr
292 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293 ExtMachInst machInst, IntRegIndex dest,
294 IntRegIndex op1, IntRegIndex op2)
295 {
296 if (notSigned) {
297 return decodeNeonUThreeReg<BaseD, BaseQ>(
298 q, size, machInst, dest, op1, op2);
299 } else {
300 return decodeNeonSThreeReg<BaseD, BaseQ>(
301 q, size, machInst, dest, op1, op2);
302 }
303 }
304
305 template <template <typename T> class BaseD,
306 template <typename T> class BaseQ>
307 StaticInstPtr
308 decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310 {
311 if (q) {
312 if (size)
313 return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314 else
315 return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316 } else {
317 if (size)
318 return new Unknown(machInst);
319 else
320 return new BaseD<uint32_t>(machInst, dest, op1, op2);
321 }
322 }
323
324 template <template <typename T> class Base>
325 StaticInstPtr
326 decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327 IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328 {
329 if (size)
330 return new Base<uint64_t>(machInst, dest, op1, op2);
331 else
332 return new Base<uint32_t>(machInst, dest, op1, op2);
333 }
334
335 template <template <typename T> class Base>
336 StaticInstPtr
337 decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338 IntRegIndex dest, IntRegIndex op1,
339 IntRegIndex op2, uint64_t imm)
340 {
341 if (size)
342 return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343 else
344 return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345 }
346
347 template <template <typename T> class BaseD,
348 template <typename T> class BaseQ>
349 StaticInstPtr
350 decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351 IntRegIndex dest, IntRegIndex op1,
352 IntRegIndex op2, uint64_t imm)
353 {
354 if (q) {
355 switch (size) {
356 case 1:
357 return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358 case 2:
359 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360 default:
361 return new Unknown(machInst);
362 }
363 } else {
364 switch (size) {
365 case 1:
366 return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367 case 2:
368 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369 default:
370 return new Unknown(machInst);
371 }
372 }
373 }
374
375 template <template <typename T> class BaseD,
376 template <typename T> class BaseQ>
377 StaticInstPtr
378 decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379 IntRegIndex dest, IntRegIndex op1,
380 IntRegIndex op2, uint64_t imm)
381 {
382 if (q) {
383 switch (size) {
384 case 1:
385 return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386 case 2:
387 return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388 default:
389 return new Unknown(machInst);
390 }
391 } else {
392 switch (size) {
393 case 1:
394 return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395 case 2:
396 return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397 default:
398 return new Unknown(machInst);
399 }
400 }
401 }
402
403 template <template <typename T> class BaseD,
404 template <typename T> class BaseQ>
405 StaticInstPtr
406 decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407 IntRegIndex dest, IntRegIndex op1,
408 IntRegIndex op2, uint64_t imm)
409 {
410 if (q) {
411 if (size)
412 return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413 else
414 return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415 } else {
416 if (size)
417 return new Unknown(machInst);
418 else
419 return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420 }
421 }
422
423 template <template <typename T> class BaseD,
424 template <typename T> class BaseQ>
425 StaticInstPtr
426 decodeNeonUTwoShiftReg(bool q, unsigned size,
427 ExtMachInst machInst, IntRegIndex dest,
428 IntRegIndex op1, uint64_t imm)
429 {
430 if (q) {
431 switch (size) {
432 case 0:
433 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434 case 1:
435 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436 case 2:
437 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438 case 3:
439 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440 default:
441 return new Unknown(machInst);
442 }
443 } else {
444 switch (size) {
445 case 0:
446 return new BaseD<uint8_t>(machInst, dest, op1, imm);
447 case 1:
448 return new BaseD<uint16_t>(machInst, dest, op1, imm);
449 case 2:
450 return new BaseD<uint32_t>(machInst, dest, op1, imm);
451 case 3:
452 return new BaseD<uint64_t>(machInst, dest, op1, imm);
453 default:
454 return new Unknown(machInst);
455 }
456 }
457 }
458
459 template <template <typename T> class BaseD,
460 template <typename T> class BaseQ>
461 StaticInstPtr
462 decodeNeonSTwoShiftReg(bool q, unsigned size,
463 ExtMachInst machInst, IntRegIndex dest,
464 IntRegIndex op1, uint64_t imm)
465 {
466 if (q) {
467 switch (size) {
468 case 0:
469 return new BaseQ<int8_t>(machInst, dest, op1, imm);
470 case 1:
471 return new BaseQ<int16_t>(machInst, dest, op1, imm);
472 case 2:
473 return new BaseQ<int32_t>(machInst, dest, op1, imm);
474 case 3:
475 return new BaseQ<int64_t>(machInst, dest, op1, imm);
476 default:
477 return new Unknown(machInst);
478 }
479 } else {
480 switch (size) {
481 case 0:
482 return new BaseD<int8_t>(machInst, dest, op1, imm);
483 case 1:
484 return new BaseD<int16_t>(machInst, dest, op1, imm);
485 case 2:
486 return new BaseD<int32_t>(machInst, dest, op1, imm);
487 case 3:
488 return new BaseD<int64_t>(machInst, dest, op1, imm);
489 default:
490 return new Unknown(machInst);
491 }
492 }
493 }
494
495
496 template <template <typename T> class BaseD,
497 template <typename T> class BaseQ>
498 StaticInstPtr
499 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500 ExtMachInst machInst, IntRegIndex dest,
501 IntRegIndex op1, uint64_t imm)
502 {
503 if (notSigned) {
504 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505 q, size, machInst, dest, op1, imm);
506 } else {
507 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508 q, size, machInst, dest, op1, imm);
509 }
510 }
511
512 template <template <typename T> class Base>
513 StaticInstPtr
514 decodeNeonUTwoShiftUSReg(unsigned size,
515 ExtMachInst machInst, IntRegIndex dest,
516 IntRegIndex op1, uint64_t imm)
517 {
518 switch (size) {
519 case 0:
520 return new Base<uint8_t>(machInst, dest, op1, imm);
521 case 1:
522 return new Base<uint16_t>(machInst, dest, op1, imm);
523 case 2:
524 return new Base<uint32_t>(machInst, dest, op1, imm);
525 default:
526 return new Unknown(machInst);
527 }
528 }
529
530 template <template <typename T> class Base>
531 StaticInstPtr
532 decodeNeonUTwoShiftUReg(unsigned size,
533 ExtMachInst machInst, IntRegIndex dest,
534 IntRegIndex op1, uint64_t imm)
535 {
536 switch (size) {
537 case 0:
538 return new Base<uint8_t>(machInst, dest, op1, imm);
539 case 1:
540 return new Base<uint16_t>(machInst, dest, op1, imm);
541 case 2:
542 return new Base<uint32_t>(machInst, dest, op1, imm);
543 case 3:
544 return new Base<uint64_t>(machInst, dest, op1, imm);
545 default:
546 return new Unknown(machInst);
547 }
548 }
549
550 template <template <typename T> class Base>
551 StaticInstPtr
552 decodeNeonSTwoShiftUReg(unsigned size,
553 ExtMachInst machInst, IntRegIndex dest,
554 IntRegIndex op1, uint64_t imm)
555 {
556 switch (size) {
557 case 0:
558 return new Base<int8_t>(machInst, dest, op1, imm);
559 case 1:
560 return new Base<int16_t>(machInst, dest, op1, imm);
561 case 2:
562 return new Base<int32_t>(machInst, dest, op1, imm);
563 case 3:
564 return new Base<int64_t>(machInst, dest, op1, imm);
565 default:
566 return new Unknown(machInst);
567 }
568 }
569
570 template <template <typename T> class BaseD,
571 template <typename T> class BaseQ>
572 StaticInstPtr
573 decodeNeonUTwoShiftSReg(bool q, unsigned size,
574 ExtMachInst machInst, IntRegIndex dest,
575 IntRegIndex op1, uint64_t imm)
576 {
577 if (q) {
578 return decodeNeonUTwoShiftUSReg<BaseQ>(
579 size, machInst, dest, op1, imm);
580 } else {
581 return decodeNeonUTwoShiftUSReg<BaseD>(
582 size, machInst, dest, op1, imm);
583 }
584 }
585
586 template <template <typename T> class Base>
587 StaticInstPtr
588 decodeNeonSTwoShiftUSReg(unsigned size,
589 ExtMachInst machInst, IntRegIndex dest,
590 IntRegIndex op1, uint64_t imm)
591 {
592 switch (size) {
593 case 0:
594 return new Base<int8_t>(machInst, dest, op1, imm);
595 case 1:
596 return new Base<int16_t>(machInst, dest, op1, imm);
597 case 2:
598 return new Base<int32_t>(machInst, dest, op1, imm);
599 default:
600 return new Unknown(machInst);
601 }
602 }
603
604 template <template <typename T> class BaseD,
605 template <typename T> class BaseQ>
606 StaticInstPtr
607 decodeNeonSTwoShiftSReg(bool q, unsigned size,
608 ExtMachInst machInst, IntRegIndex dest,
609 IntRegIndex op1, uint64_t imm)
610 {
611 if (q) {
612 return decodeNeonSTwoShiftUSReg<BaseQ>(
613 size, machInst, dest, op1, imm);
614 } else {
615 return decodeNeonSTwoShiftUSReg<BaseD>(
616 size, machInst, dest, op1, imm);
617 }
618 }
619
620 template <template <typename T> class BaseD,
621 template <typename T> class BaseQ>
622 StaticInstPtr
623 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624 ExtMachInst machInst, IntRegIndex dest,
625 IntRegIndex op1, uint64_t imm)
626 {
627 if (notSigned) {
628 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629 q, size, machInst, dest, op1, imm);
630 } else {
631 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632 q, size, machInst, dest, op1, imm);
633 }
634 }
635
636 template <template <typename T> class BaseD,
637 template <typename T> class BaseQ>
638 StaticInstPtr
639 decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641 {
642 if (q) {
643 return decodeNeonUTwoShiftUReg<BaseQ>(
644 size, machInst, dest, op1, imm);
645 } else {
646 return decodeNeonUTwoShiftUSReg<BaseD>(
647 size, machInst, dest, op1, imm);
648 }
649 }
650
651 template <template <typename T> class BaseD,
652 template <typename T> class BaseQ>
653 StaticInstPtr
654 decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656 {
657 if (q) {
658 return decodeNeonSTwoShiftUReg<BaseQ>(
659 size, machInst, dest, op1, imm);
660 } else {
661 return decodeNeonSTwoShiftUSReg<BaseD>(
662 size, machInst, dest, op1, imm);
663 }
664 }
665
666 template <template <typename T> class Base>
667 StaticInstPtr
668 decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670 {
671 if (size)
672 return new Base<uint64_t>(machInst, dest, op1, imm);
673 else
674 return new Base<uint32_t>(machInst, dest, op1, imm);
675 }
676
677 template <template <typename T> class BaseD,
678 template <typename T> class BaseQ>
679 StaticInstPtr
680 decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681 IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682 {
683 if (q) {
684 if (size)
685 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686 else
687 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688 } else {
689 if (size)
690 return new Unknown(machInst);
691 else
692 return new BaseD<uint32_t>(machInst, dest, op1, imm);
693 }
694 }
695
696 template <template <typename T> class Base>
697 StaticInstPtr
698 decodeNeonUTwoMiscUSReg(unsigned size,
699 ExtMachInst machInst, IntRegIndex dest,
700 IntRegIndex op1)
701 {
702 switch (size) {
703 case 0:
704 return new Base<uint8_t>(machInst, dest, op1);
705 case 1:
706 return new Base<uint16_t>(machInst, dest, op1);
707 case 2:
708 return new Base<uint32_t>(machInst, dest, op1);
709 default:
710 return new Unknown(machInst);
711 }
712 }
713
714 template <template <typename T> class Base>
715 StaticInstPtr
716 decodeNeonSTwoMiscUSReg(unsigned size,
717 ExtMachInst machInst, IntRegIndex dest,
718 IntRegIndex op1)
719 {
720 switch (size) {
721 case 0:
722 return new Base<int8_t>(machInst, dest, op1);
723 case 1:
724 return new Base<int16_t>(machInst, dest, op1);
725 case 2:
726 return new Base<int32_t>(machInst, dest, op1);
727 default:
728 return new Unknown(machInst);
729 }
730 }
731
732 template <template <typename T> class BaseD,
733 template <typename T> class BaseQ>
734 StaticInstPtr
735 decodeNeonUTwoMiscSReg(bool q, unsigned size,
736 ExtMachInst machInst, IntRegIndex dest,
737 IntRegIndex op1)
738 {
739 if (q) {
740 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741 } else {
742 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743 }
744 }
745
746 template <template <typename T> class BaseD,
747 template <typename T> class BaseQ>
748 StaticInstPtr
749 decodeNeonSTwoMiscSReg(bool q, unsigned size,
750 ExtMachInst machInst, IntRegIndex dest,
751 IntRegIndex op1)
752 {
753 if (q) {
754 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755 } else {
756 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757 }
758 }
759
760 template <template <typename T> class Base>
761 StaticInstPtr
762 decodeNeonUTwoMiscUReg(unsigned size,
763 ExtMachInst machInst, IntRegIndex dest,
764 IntRegIndex op1)
765 {
766 switch (size) {
767 case 0:
768 return new Base<uint8_t>(machInst, dest, op1);
769 case 1:
770 return new Base<uint16_t>(machInst, dest, op1);
771 case 2:
772 return new Base<uint32_t>(machInst, dest, op1);
773 case 3:
774 return new Base<uint64_t>(machInst, dest, op1);
775 default:
776 return new Unknown(machInst);
777 }
778 }
779
780 template <template <typename T> class Base>
781 StaticInstPtr
782 decodeNeonSTwoMiscUReg(unsigned size,
783 ExtMachInst machInst, IntRegIndex dest,
784 IntRegIndex op1)
785 {
786 switch (size) {
787 case 0:
788 return new Base<int8_t>(machInst, dest, op1);
789 case 1:
790 return new Base<int16_t>(machInst, dest, op1);
791 case 2:
792 return new Base<int32_t>(machInst, dest, op1);
793 case 3:
794 return new Base<int64_t>(machInst, dest, op1);
795 default:
796 return new Unknown(machInst);
797 }
798 }
799
800 template <template <typename T> class BaseD,
801 template <typename T> class BaseQ>
802 StaticInstPtr
803 decodeNeonSTwoMiscReg(bool q, unsigned size,
804 ExtMachInst machInst, IntRegIndex dest,
805 IntRegIndex op1)
806 {
807 if (q) {
808 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809 } else {
810 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811 }
812 }
813
814 template <template <typename T> class BaseD,
815 template <typename T> class BaseQ>
816 StaticInstPtr
817 decodeNeonUTwoMiscReg(bool q, unsigned size,
818 ExtMachInst machInst, IntRegIndex dest,
819 IntRegIndex op1)
820 {
821 if (q) {
822 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823 } else {
824 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825 }
826 }
827
828 template <template <typename T> class BaseD,
829 template <typename T> class BaseQ>
830 StaticInstPtr
831 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832 ExtMachInst machInst, IntRegIndex dest,
833 IntRegIndex op1)
834 {
835 if (notSigned) {
836 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837 q, size, machInst, dest, op1);
838 } else {
839 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840 q, size, machInst, dest, op1);
841 }
842 }
843
844 template <template <typename T> class BaseD,
845 template <typename T> class BaseQ>
846 StaticInstPtr
847 decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848 IntRegIndex dest, IntRegIndex op1)
849 {
850 if (q) {
851 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852 } else {
853 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854 }
855 }
856
857 template <template <typename T> class BaseD,
858 template <typename T> class BaseQ>
859 StaticInstPtr
860 decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861 IntRegIndex dest, IntRegIndex op1)
862 {
863 if (q) {
864 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865 } else {
866 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867 }
868 }
869
870 template <template <typename T> class BaseD,
871 template <typename T> class BaseQ>
872 StaticInstPtr
873 decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874 IntRegIndex dest, IntRegIndex op1)
875 {
876 if (q) {
877 if (size)
878 return new BaseQ<uint64_t>(machInst, dest, op1);
879 else
880 return new BaseQ<uint32_t>(machInst, dest, op1);
881 } else {
882 if (size)
883 return new Unknown(machInst);
884 else
885 return new BaseD<uint32_t>(machInst, dest, op1);
886 }
887 }
888
889 template <template <typename T> class BaseD,
890 template <typename T> class BaseQ>
891 StaticInstPtr
892 decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893 IntRegIndex dest, IntRegIndex op1)
894 {
895 if (size)
896 return new BaseQ<uint64_t>(machInst, dest, op1);
897 else
898 return new BaseD<uint32_t>(machInst, dest, op1);
899 }
900
901 template <template <typename T> class Base>
902 StaticInstPtr
903 decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904 IntRegIndex dest, IntRegIndex op1)
905 {
906 if (size)
907 return new Base<uint64_t>(machInst, dest, op1);
908 else
909 return new Base<uint32_t>(machInst, dest, op1);
910 }
911
912 template <template <typename T> class BaseD,
913 template <typename T> class BaseQ>
914 StaticInstPtr
915 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916 IntRegIndex dest, IntRegIndex op1)
917 {
918 if (q) {
919 switch (size) {
920 case 0x0:
921 return new BaseQ<uint8_t>(machInst, dest, op1);
922 case 0x1:
923 return new BaseQ<uint16_t>(machInst, dest, op1);
924 case 0x2:
925 return new BaseQ<uint32_t>(machInst, dest, op1);
926 default:
927 return new Unknown(machInst);
928 }
929 } else {
930 switch (size) {
931 case 0x0:
932 return new BaseD<uint8_t>(machInst, dest, op1);
933 case 0x1:
934 return new BaseD<uint16_t>(machInst, dest, op1);
935 default:
936 return new Unknown(machInst);
937 }
938 }
939 }
940
941 template <template <typename T> class BaseD,
942 template <typename T> class BaseQ,
943 template <typename T> class BaseBQ>
944 StaticInstPtr
945 decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946 IntRegIndex dest, IntRegIndex op1)
947 {
948 if (q) {
949 switch (size) {
950 case 0x0:
951 return new BaseQ<uint8_t>(machInst, dest, op1);
952 case 0x1:
953 return new BaseQ<uint16_t>(machInst, dest, op1);
954 case 0x2:
955 return new BaseBQ<uint32_t>(machInst, dest, op1);
956 default:
957 return new Unknown(machInst);
958 }
959 } else {
960 switch (size) {
961 case 0x0:
962 return new BaseD<uint8_t>(machInst, dest, op1);
963 case 0x1:
964 return new BaseD<uint16_t>(machInst, dest, op1);
965 default:
966 return new Unknown(machInst);
967 }
968 }
969 }
970
971 template <template <typename T> class BaseD,
972 template <typename T> class BaseQ>
973 StaticInstPtr
974 decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975 IntRegIndex dest, IntRegIndex op1)
976 {
977 if (q) {
978 switch (size) {
979 case 0x0:
980 return new BaseQ<int8_t>(machInst, dest, op1);
981 case 0x1:
982 return new BaseQ<int16_t>(machInst, dest, op1);
983 case 0x2:
984 return new BaseQ<int32_t>(machInst, dest, op1);
985 default:
986 return new Unknown(machInst);
987 }
988 } else {
989 switch (size) {
990 case 0x0:
991 return new BaseD<int8_t>(machInst, dest, op1);
992 case 0x1:
993 return new BaseD<int16_t>(machInst, dest, op1);
994 default:
995 return new Unknown(machInst);
996 }
997 }
998 }
999
1000 template <template <typename T> class BaseD,
1001 template <typename T> class BaseQ,
1002 template <typename T> class BaseBQ>
1003 StaticInstPtr
1004 decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005 IntRegIndex dest, IntRegIndex op1)
1006 {
1007 if (q) {
1008 switch (size) {
1009 case 0x0:
1010 return new BaseQ<uint8_t>(machInst, dest, op1);
1011 case 0x1:
1012 return new BaseQ<uint16_t>(machInst, dest, op1);
1013 case 0x2:
1014 return new BaseBQ<uint32_t>(machInst, dest, op1);
1015 default:
1016 return new Unknown(machInst);
1017 }
1018 } else {
1019 switch (size) {
1020 case 0x0:
1021 return new BaseD<uint8_t>(machInst, dest, op1);
1022 case 0x1:
1023 return new BaseD<uint16_t>(machInst, dest, op1);
1024 default:
1025 return new Unknown(machInst);
1026 }
1027 }
1028 }
1029
1030 template <template <typename T> class BaseD,
1031 template <typename T> class BaseQ,
1032 template <typename T> class BaseBQ>
1033 StaticInstPtr
1034 decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035 IntRegIndex dest, IntRegIndex op1)
1036 {
1037 if (q) {
1038 switch (size) {
1039 case 0x0:
1040 return new BaseQ<int8_t>(machInst, dest, op1);
1041 case 0x1:
1042 return new BaseQ<int16_t>(machInst, dest, op1);
1043 case 0x2:
1044 return new BaseBQ<int32_t>(machInst, dest, op1);
1045 default:
1046 return new Unknown(machInst);
1047 }
1048 } else {
1049 switch (size) {
1050 case 0x0:
1051 return new BaseD<int8_t>(machInst, dest, op1);
1052 case 0x1:
1053 return new BaseD<int16_t>(machInst, dest, op1);
1054 default:
1055 return new Unknown(machInst);
1056 }
1057 }
1058 }
1059}};
1060
1061let {{
1062 header_output = ""
1063 exec_output = ""
1064
1065 vcompares = '''
1066 static float
1067 vcgtFunc(float op1, float op2)
1068 {
1069 if (std::isnan(op1) || std::isnan(op2))
1070 return 2.0;
1071 return (op1 > op2) ? 0.0 : 1.0;
1072 }
1073
1074 static float
1075 vcgeFunc(float op1, float op2)
1076 {
1077 if (std::isnan(op1) || std::isnan(op2))
1078 return 2.0;
1079 return (op1 >= op2) ? 0.0 : 1.0;
1080 }
1081
1082 static float
1083 vceqFunc(float op1, float op2)
1084 {
1085 if (isSnan(op1) || isSnan(op2))
1086 return 2.0;
1087 return (op1 == op2) ? 0.0 : 1.0;
1088 }
1089'''
1090 vcomparesL = '''
1091 static float
1092 vcleFunc(float op1, float op2)
1093 {
1094 if (std::isnan(op1) || std::isnan(op2))
1095 return 2.0;
1096 return (op1 <= op2) ? 0.0 : 1.0;
1097 }
1098
1099 static float
1100 vcltFunc(float op1, float op2)
1101 {
1102 if (std::isnan(op1) || std::isnan(op2))
1103 return 2.0;
1104 return (op1 < op2) ? 0.0 : 1.0;
1105 }
1106'''
1107 vacomparesG = '''
1108 static float
1109 vacgtFunc(float op1, float op2)
1110 {
1111 if (std::isnan(op1) || std::isnan(op2))
1112 return 2.0;
1113 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114 }
1115
1116 static float
1117 vacgeFunc(float op1, float op2)
1118 {
1119 if (std::isnan(op1) || std::isnan(op2))
1120 return 2.0;
1121 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122 }
1123'''
1124
1125 exec_output += vcompares + vacomparesG
1126
1127 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130 signedTypes = smallSignedTypes + ("int64_t",)
1131 smallTypes = smallUnsignedTypes + smallSignedTypes
1132 allTypes = unsignedTypes + signedTypes
1133
1134 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135 readDest=False, pairwise=False):
1136 global header_output, exec_output
1137 eWalkCode = simdEnabledCheckCode + '''
1138 RegVect srcReg1, srcReg2, destReg;
1139 '''
1140 for reg in range(rCount):
1141 eWalkCode += '''
1142 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144 ''' % { "reg" : reg }
1145 if readDest:
1146 eWalkCode += '''
1147 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148 ''' % { "reg" : reg }
1149 readDestCode = ''
1150 if readDest:
1151 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152 if pairwise:
1153 eWalkCode += '''
1154 for (unsigned i = 0; i < eCount; i++) {
1155 Element srcElem1 = gtoh(2 * i < eCount ?
1156 srcReg1.elements[2 * i] :
1157 srcReg2.elements[2 * i - eCount]);
1158 Element srcElem2 = gtoh(2 * i < eCount ?
1159 srcReg1.elements[2 * i + 1] :
1160 srcReg2.elements[2 * i + 1 - eCount]);
1161 Element destElem;
1162 %(readDest)s
1163 %(op)s
1164 destReg.elements[i] = htog(destElem);
1165 }
1166 ''' % { "op" : op, "readDest" : readDestCode }
1167 else:
1168 eWalkCode += '''
1169 for (unsigned i = 0; i < eCount; i++) {
1170 Element srcElem1 = gtoh(srcReg1.elements[i]);
1171 Element srcElem2 = gtoh(srcReg2.elements[i]);
1172 Element destElem;
1173 %(readDest)s
1174 %(op)s
1175 destReg.elements[i] = htog(destElem);
1176 }
1177 ''' % { "op" : op, "readDest" : readDestCode }
1178 for reg in range(rCount):
1179 eWalkCode += '''
1180 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181 ''' % { "reg" : reg }
1182 iop = InstObjParams(name, Name,
1183 "RegRegRegOp",
1184 { "code": eWalkCode,
1185 "r_count": rCount,
1186 "predicate_test": predicateTest,
1187 "op_class": opClass }, [])
1188 header_output += NeonRegRegRegOpDeclare.subst(iop)
1189 exec_output += NeonEqualRegExecute.subst(iop)
1190 for type in types:
1191 substDict = { "targs" : type,
1192 "class_name" : Name }
1193 exec_output += NeonExecDeclare.subst(substDict)
1194
1195 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196 readDest=False, pairwise=False, toInt=False):
1197 global header_output, exec_output
1198 eWalkCode = simdEnabledCheckCode + '''
1199 typedef FloatReg FloatVect[rCount];
1200 FloatVect srcRegs1, srcRegs2;
1201 '''
1202 if toInt:
1203 eWalkCode += 'RegVect destRegs;\n'
1204 else:
1205 eWalkCode += 'FloatVect destRegs;\n'
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210 ''' % { "reg" : reg }
1211 if readDest:
1212 if toInt:
1213 eWalkCode += '''
1214 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215 ''' % { "reg" : reg }
1216 else:
1217 eWalkCode += '''
1218 destRegs[%(reg)d] = FpDestP%(reg)d;
1219 ''' % { "reg" : reg }
1220 readDestCode = ''
1221 if readDest:
1222 readDestCode = 'destReg = destRegs[r];'
1223 destType = 'FloatReg'
1224 writeDest = 'destRegs[r] = destReg;'
1225 if toInt:
1226 destType = 'FloatRegBits'
1227 writeDest = 'destRegs.regs[r] = destReg;'
1228 if pairwise:
1229 eWalkCode += '''
1230 for (unsigned r = 0; r < rCount; r++) {
1231 FloatReg srcReg1 = (2 * r < rCount) ?
1232 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1233 FloatReg srcReg2 = (2 * r < rCount) ?
1234 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235 %(destType)s destReg;
1236 %(readDest)s
1237 %(op)s
1238 %(writeDest)s
1239 }
1240 ''' % { "op" : op,
1241 "readDest" : readDestCode,
1242 "destType" : destType,
1243 "writeDest" : writeDest }
1244 else:
1245 eWalkCode += '''
1246 for (unsigned r = 0; r < rCount; r++) {
1247 FloatReg srcReg1 = srcRegs1[r];
1248 FloatReg srcReg2 = srcRegs2[r];
1249 %(destType)s destReg;
1250 %(readDest)s
1251 %(op)s
1252 %(writeDest)s
1253 }
1254 ''' % { "op" : op,
1255 "readDest" : readDestCode,
1256 "destType" : destType,
1257 "writeDest" : writeDest }
1258 for reg in range(rCount):
1259 if toInt:
1260 eWalkCode += '''
1261 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262 ''' % { "reg" : reg }
1263 else:
1264 eWalkCode += '''
1265 FpDestP%(reg)d = destRegs[%(reg)d];
1266 ''' % { "reg" : reg }
1267 iop = InstObjParams(name, Name,
1268 "FpRegRegRegOp",
1269 { "code": eWalkCode,
1270 "r_count": rCount,
1271 "predicate_test": predicateTest,
1272 "op_class": opClass }, [])
1273 header_output += NeonRegRegRegOpDeclare.subst(iop)
1274 exec_output += NeonEqualRegExecute.subst(iop)
1275 for type in types:
1276 substDict = { "targs" : type,
1277 "class_name" : Name }
1278 exec_output += NeonExecDeclare.subst(substDict)
1279
1280 def threeUnequalRegInst(name, Name, opClass, types, op,
1281 bigSrc1, bigSrc2, bigDest, readDest):
1282 global header_output, exec_output
1283 src1Cnt = src2Cnt = destCnt = 2
1284 src1Prefix = src2Prefix = destPrefix = ''
1285 if bigSrc1:
1286 src1Cnt = 4
1287 src1Prefix = 'Big'
1288 if bigSrc2:
1289 src2Cnt = 4
1290 src2Prefix = 'Big'
1291 if bigDest:
1292 destCnt = 4
1293 destPrefix = 'Big'
1294 eWalkCode = simdEnabledCheckCode + '''
1295 %sRegVect srcReg1;
1296 %sRegVect srcReg2;
1297 %sRegVect destReg;
1298 ''' % (src1Prefix, src2Prefix, destPrefix)
1299 for reg in range(src1Cnt):
1300 eWalkCode += '''
1301 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302 ''' % { "reg" : reg }
1303 for reg in range(src2Cnt):
1304 eWalkCode += '''
1305 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306 ''' % { "reg" : reg }
1307 if readDest:
1308 for reg in range(destCnt):
1309 eWalkCode += '''
1310 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311 ''' % { "reg" : reg }
1312 readDestCode = ''
1313 if readDest:
1314 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315 eWalkCode += '''
1316 for (unsigned i = 0; i < eCount; i++) {
1317 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319 %(destPrefix)sElement destElem;
1320 %(readDest)s
1321 %(op)s
1322 destReg.elements[i] = htog(destElem);
1323 }
1324 ''' % { "op" : op, "readDest" : readDestCode,
1325 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326 "destPrefix" : destPrefix }
1327 for reg in range(destCnt):
1328 eWalkCode += '''
1329 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "RegRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": 2,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegRegOpDeclare.subst(iop)
1338 exec_output += NeonUnequalRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345 threeUnequalRegInst(name, Name, opClass, types, op,
1346 True, True, False, readDest)
1347
1348 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349 threeUnequalRegInst(name, Name, opClass, types, op,
1350 False, False, True, readDest)
1351
1352 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353 threeUnequalRegInst(name, Name, opClass, types, op,
1354 True, False, True, readDest)
1355
1356 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357 global header_output, exec_output
1358 eWalkCode = simdEnabledCheckCode + '''
1359 RegVect srcReg1, srcReg2, destReg;
1360 '''
1361 for reg in range(rCount):
1362 eWalkCode += '''
1363 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365 ''' % { "reg" : reg }
1366 if readDest:
1367 eWalkCode += '''
1368 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369 ''' % { "reg" : reg }
1370 readDestCode = ''
1371 if readDest:
1372 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373 eWalkCode += '''
1374 if (imm < 0 && imm >= eCount) {
1375 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1376 mnemonic);
1377 } else {
1378 for (unsigned i = 0; i < eCount; i++) {
1379 Element srcElem1 = gtoh(srcReg1.elements[i]);
1380 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1381 Element destElem;
1382 %(readDest)s
1383 %(op)s
1384 destReg.elements[i] = htog(destElem);
1385 }
1386 }
1387 ''' % { "op" : op, "readDest" : readDestCode }
1388 for reg in range(rCount):
1389 eWalkCode += '''
1390 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1391 ''' % { "reg" : reg }
1392 iop = InstObjParams(name, Name,
1393 "RegRegRegImmOp",
1394 { "code": eWalkCode,
1395 "r_count": rCount,
1396 "predicate_test": predicateTest,
1397 "op_class": opClass }, [])
1398 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1399 exec_output += NeonEqualRegExecute.subst(iop)
1400 for type in types:
1401 substDict = { "targs" : type,
1402 "class_name" : Name }
1403 exec_output += NeonExecDeclare.subst(substDict)
1404
1405 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1406 global header_output, exec_output
1407 rCount = 2
1408 eWalkCode = simdEnabledCheckCode + '''
1409 RegVect srcReg1, srcReg2;
1410 BigRegVect destReg;
1411 '''
1412 for reg in range(rCount):
1413 eWalkCode += '''
1414 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1415 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1416 ''' % { "reg" : reg }
1417 if readDest:
1418 for reg in range(2 * rCount):
1419 eWalkCode += '''
1420 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1421 ''' % { "reg" : reg }
1422 readDestCode = ''
1423 if readDest:
1424 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1425 eWalkCode += '''
1426 if (imm < 0 && imm >= eCount) {
1427 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1428 mnemonic);
1429 } else {
1430 for (unsigned i = 0; i < eCount; i++) {
1431 Element srcElem1 = gtoh(srcReg1.elements[i]);
1432 Element srcElem2 = gtoh(srcReg2.elements[imm]);
1433 BigElement destElem;
1434 %(readDest)s
1435 %(op)s
1436 destReg.elements[i] = htog(destElem);
1437 }
1438 }
1439 ''' % { "op" : op, "readDest" : readDestCode }
1440 for reg in range(2 * rCount):
1441 eWalkCode += '''
1442 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1443 ''' % { "reg" : reg }
1444 iop = InstObjParams(name, Name,
1445 "RegRegRegImmOp",
1446 { "code": eWalkCode,
1447 "r_count": rCount,
1448 "predicate_test": predicateTest,
1449 "op_class": opClass }, [])
1450 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1451 exec_output += NeonUnequalRegExecute.subst(iop)
1452 for type in types:
1453 substDict = { "targs" : type,
1454 "class_name" : Name }
1455 exec_output += NeonExecDeclare.subst(substDict)
1456
1457 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1458 global header_output, exec_output
1459 eWalkCode = simdEnabledCheckCode + '''
1460 typedef FloatReg FloatVect[rCount];
1461 FloatVect srcRegs1, srcRegs2, destRegs;
1462 '''
1463 for reg in range(rCount):
1464 eWalkCode += '''
1465 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1466 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1467 ''' % { "reg" : reg }
1468 if readDest:
1469 eWalkCode += '''
1470 destRegs[%(reg)d] = FpDestP%(reg)d;
1471 ''' % { "reg" : reg }
1472 readDestCode = ''
1473 if readDest:
1474 readDestCode = 'destReg = destRegs[i];'
1475 eWalkCode += '''
1476 if (imm < 0 && imm >= eCount) {
1477 fault = std::make_shared<UndefinedInstruction>(machInst, false,
1478 mnemonic);
1479 } else {
1480 for (unsigned i = 0; i < rCount; i++) {
1481 FloatReg srcReg1 = srcRegs1[i];
1482 FloatReg srcReg2 = srcRegs2[imm];
1483 FloatReg destReg;
1484 %(readDest)s
1485 %(op)s
1486 destRegs[i] = destReg;
1487 }
1488 }
1489 ''' % { "op" : op, "readDest" : readDestCode }
1490 for reg in range(rCount):
1491 eWalkCode += '''
1492 FpDestP%(reg)d = destRegs[%(reg)d];
1493 ''' % { "reg" : reg }
1494 iop = InstObjParams(name, Name,
1495 "FpRegRegRegImmOp",
1496 { "code": eWalkCode,
1497 "r_count": rCount,
1498 "predicate_test": predicateTest,
1499 "op_class": opClass }, [])
1500 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1501 exec_output += NeonEqualRegExecute.subst(iop)
1502 for type in types:
1503 substDict = { "targs" : type,
1504 "class_name" : Name }
1505 exec_output += NeonExecDeclare.subst(substDict)
1506
1507 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1508 readDest=False, toInt=False, fromInt=False):
1509 global header_output, exec_output
1510 eWalkCode = simdEnabledCheckCode + '''
1511 RegVect srcRegs1, destRegs;
1512 '''
1513 for reg in range(rCount):
1514 eWalkCode += '''
1515 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1516 ''' % { "reg" : reg }
1517 if readDest:
1518 eWalkCode += '''
1519 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1520 ''' % { "reg" : reg }
1521 readDestCode = ''
1522 if readDest:
1523 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1524 if toInt:
1525 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1526 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1527 if fromInt:
1528 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1529 declDest = 'Element destElem;'
1530 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1531 if toInt:
1532 declDest = 'FloatRegBits destReg;'
1533 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1534 eWalkCode += '''
1535 for (unsigned i = 0; i < eCount; i++) {
1536 %(readOp)s
1537 %(declDest)s
1538 %(readDest)s
1539 %(op)s
1540 %(writeDest)s
1541 }
1542 ''' % { "readOp" : readOpCode,
1543 "declDest" : declDest,
1544 "readDest" : readDestCode,
1545 "op" : op,
1546 "writeDest" : writeDestCode }
1547 for reg in range(rCount):
1548 eWalkCode += '''
1549 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1550 ''' % { "reg" : reg }
1551 iop = InstObjParams(name, Name,
1552 "RegRegImmOp",
1553 { "code": eWalkCode,
1554 "r_count": rCount,
1555 "predicate_test": predicateTest,
1556 "op_class": opClass }, [])
1557 header_output += NeonRegRegImmOpDeclare.subst(iop)
1558 exec_output += NeonEqualRegExecute.subst(iop)
1559 for type in types:
1560 substDict = { "targs" : type,
1561 "class_name" : Name }
1562 exec_output += NeonExecDeclare.subst(substDict)
1563
1564 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1565 global header_output, exec_output
1566 eWalkCode = simdEnabledCheckCode + '''
1567 BigRegVect srcReg1;
1568 RegVect destReg;
1569 '''
1570 for reg in range(4):
1571 eWalkCode += '''
1572 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1573 ''' % { "reg" : reg }
1574 if readDest:
1575 for reg in range(2):
1576 eWalkCode += '''
1577 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1578 ''' % { "reg" : reg }
1579 readDestCode = ''
1580 if readDest:
1581 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1582 eWalkCode += '''
1583 for (unsigned i = 0; i < eCount; i++) {
1584 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1585 Element destElem;
1586 %(readDest)s
1587 %(op)s
1588 destReg.elements[i] = htog(destElem);
1589 }
1590 ''' % { "op" : op, "readDest" : readDestCode }
1591 for reg in range(2):
1592 eWalkCode += '''
1593 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1594 ''' % { "reg" : reg }
1595 iop = InstObjParams(name, Name,
1596 "RegRegImmOp",
1597 { "code": eWalkCode,
1598 "r_count": 2,
1599 "predicate_test": predicateTest,
1600 "op_class": opClass }, [])
1601 header_output += NeonRegRegImmOpDeclare.subst(iop)
1602 exec_output += NeonUnequalRegExecute.subst(iop)
1603 for type in types:
1604 substDict = { "targs" : type,
1605 "class_name" : Name }
1606 exec_output += NeonExecDeclare.subst(substDict)
1607
1608 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1609 global header_output, exec_output
1610 eWalkCode = simdEnabledCheckCode + '''
1611 RegVect srcReg1;
1612 BigRegVect destReg;
1613 '''
1614 for reg in range(2):
1615 eWalkCode += '''
1616 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1617 ''' % { "reg" : reg }
1618 if readDest:
1619 for reg in range(4):
1620 eWalkCode += '''
1621 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1622 ''' % { "reg" : reg }
1623 readDestCode = ''
1624 if readDest:
1625 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1626 eWalkCode += '''
1627 for (unsigned i = 0; i < eCount; i++) {
1628 Element srcElem1 = gtoh(srcReg1.elements[i]);
1629 BigElement destElem;
1630 %(readDest)s
1631 %(op)s
1632 destReg.elements[i] = htog(destElem);
1633 }
1634 ''' % { "op" : op, "readDest" : readDestCode }
1635 for reg in range(4):
1636 eWalkCode += '''
1637 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1638 ''' % { "reg" : reg }
1639 iop = InstObjParams(name, Name,
1640 "RegRegImmOp",
1641 { "code": eWalkCode,
1642 "r_count": 2,
1643 "predicate_test": predicateTest,
1644 "op_class": opClass }, [])
1645 header_output += NeonRegRegImmOpDeclare.subst(iop)
1646 exec_output += NeonUnequalRegExecute.subst(iop)
1647 for type in types:
1648 substDict = { "targs" : type,
1649 "class_name" : Name }
1650 exec_output += NeonExecDeclare.subst(substDict)
1651
1652 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1653 global header_output, exec_output
1654 eWalkCode = simdEnabledCheckCode + '''
1655 RegVect srcReg1, destReg;
1656 '''
1657 for reg in range(rCount):
1658 eWalkCode += '''
1659 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1660 ''' % { "reg" : reg }
1661 if readDest:
1662 eWalkCode += '''
1663 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1664 ''' % { "reg" : reg }
1665 readDestCode = ''
1666 if readDest:
1667 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1668 eWalkCode += '''
1669 for (unsigned i = 0; i < eCount; i++) {
1670 unsigned j = i;
1671 Element srcElem1 = gtoh(srcReg1.elements[i]);
1672 Element destElem;
1673 %(readDest)s
1674 %(op)s
1675 destReg.elements[j] = htog(destElem);
1676 }
1677 ''' % { "op" : op, "readDest" : readDestCode }
1678 for reg in range(rCount):
1679 eWalkCode += '''
1680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1681 ''' % { "reg" : reg }
1682 iop = InstObjParams(name, Name,
1683 "RegRegOp",
1684 { "code": eWalkCode,
1685 "r_count": rCount,
1686 "predicate_test": predicateTest,
1687 "op_class": opClass }, [])
1688 header_output += NeonRegRegOpDeclare.subst(iop)
1689 exec_output += NeonEqualRegExecute.subst(iop)
1690 for type in types:
1691 substDict = { "targs" : type,
1692 "class_name" : Name }
1693 exec_output += NeonExecDeclare.subst(substDict)
1694
1695 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1696 global header_output, exec_output
1697 eWalkCode = simdEnabledCheckCode + '''
1698 RegVect srcReg1, destReg;
1699 '''
1700 for reg in range(rCount):
1701 eWalkCode += '''
1702 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1703 ''' % { "reg" : reg }
1704 if readDest:
1705 eWalkCode += '''
1706 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1707 ''' % { "reg" : reg }
1708 readDestCode = ''
1709 if readDest:
1710 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1711 eWalkCode += '''
1712 for (unsigned i = 0; i < eCount; i++) {
1713 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1714 Element destElem;
1715 %(readDest)s
1716 %(op)s
1717 destReg.elements[i] = htog(destElem);
1718 }
1719 ''' % { "op" : op, "readDest" : readDestCode }
1720 for reg in range(rCount):
1721 eWalkCode += '''
1722 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1723 ''' % { "reg" : reg }
1724 iop = InstObjParams(name, Name,
1725 "RegRegImmOp",
1726 { "code": eWalkCode,
1727 "r_count": rCount,
1728 "predicate_test": predicateTest,
1729 "op_class": opClass }, [])
1730 header_output += NeonRegRegImmOpDeclare.subst(iop)
1731 exec_output += NeonEqualRegExecute.subst(iop)
1732 for type in types:
1733 substDict = { "targs" : type,
1734 "class_name" : Name }
1735 exec_output += NeonExecDeclare.subst(substDict)
1736
1737 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1738 global header_output, exec_output
1739 eWalkCode = simdEnabledCheckCode + '''
1740 RegVect srcReg1, destReg;
1741 '''
1742 for reg in range(rCount):
1743 eWalkCode += '''
1744 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1745 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1746 ''' % { "reg" : reg }
1747 if readDest:
1748 eWalkCode += '''
1749 ''' % { "reg" : reg }
1750 readDestCode = ''
1751 if readDest:
1752 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1753 eWalkCode += op
1754 for reg in range(rCount):
1755 eWalkCode += '''
1756 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1757 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1758 ''' % { "reg" : reg }
1759 iop = InstObjParams(name, Name,
1760 "RegRegOp",
1761 { "code": eWalkCode,
1762 "r_count": rCount,
1763 "predicate_test": predicateTest,
1764 "op_class": opClass }, [])
1765 header_output += NeonRegRegOpDeclare.subst(iop)
1766 exec_output += NeonEqualRegExecute.subst(iop)
1767 for type in types:
1768 substDict = { "targs" : type,
1769 "class_name" : Name }
1770 exec_output += NeonExecDeclare.subst(substDict)
1771
1772 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1773 readDest=False, toInt=False):
1774 global header_output, exec_output
1775 eWalkCode = simdEnabledCheckCode + '''
1776 typedef FloatReg FloatVect[rCount];
1777 FloatVect srcRegs1;
1778 '''
1779 if toInt:
1780 eWalkCode += 'RegVect destRegs;\n'
1781 else:
1782 eWalkCode += 'FloatVect destRegs;\n'
1783 for reg in range(rCount):
1784 eWalkCode += '''
1785 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1786 ''' % { "reg" : reg }
1787 if readDest:
1788 if toInt:
1789 eWalkCode += '''
1790 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1791 ''' % { "reg" : reg }
1792 else:
1793 eWalkCode += '''
1794 destRegs[%(reg)d] = FpDestP%(reg)d;
1795 ''' % { "reg" : reg }
1796 readDestCode = ''
1797 if readDest:
1798 readDestCode = 'destReg = destRegs[i];'
1799 destType = 'FloatReg'
1800 writeDest = 'destRegs[r] = destReg;'
1801 if toInt:
1802 destType = 'FloatRegBits'
1803 writeDest = 'destRegs.regs[r] = destReg;'
1804 eWalkCode += '''
1805 for (unsigned r = 0; r < rCount; r++) {
1806 FloatReg srcReg1 = srcRegs1[r];
1807 %(destType)s destReg;
1808 %(readDest)s
1809 %(op)s
1810 %(writeDest)s
1811 }
1812 ''' % { "op" : op,
1813 "readDest" : readDestCode,
1814 "destType" : destType,
1815 "writeDest" : writeDest }
1816 for reg in range(rCount):
1817 if toInt:
1818 eWalkCode += '''
1819 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1820 ''' % { "reg" : reg }
1821 else:
1822 eWalkCode += '''
1823 FpDestP%(reg)d = destRegs[%(reg)d];
1824 ''' % { "reg" : reg }
1825 iop = InstObjParams(name, Name,
1826 "FpRegRegOp",
1827 { "code": eWalkCode,
1828 "r_count": rCount,
1829 "predicate_test": predicateTest,
1830 "op_class": opClass }, [])
1831 header_output += NeonRegRegOpDeclare.subst(iop)
1832 exec_output += NeonEqualRegExecute.subst(iop)
1833 for type in types:
1834 substDict = { "targs" : type,
1835 "class_name" : Name }
1836 exec_output += NeonExecDeclare.subst(substDict)
1837
1838 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1839 global header_output, exec_output
1840 eWalkCode = simdEnabledCheckCode + '''
1841 RegVect srcRegs;
1842 BigRegVect destReg;
1843 '''
1844 for reg in range(rCount):
1845 eWalkCode += '''
1846 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1847 ''' % { "reg" : reg }
1848 if readDest:
1849 eWalkCode += '''
1850 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1851 ''' % { "reg" : reg }
1852 readDestCode = ''
1853 if readDest:
1854 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1855 eWalkCode += '''
1856 for (unsigned i = 0; i < eCount / 2; i++) {
1857 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1858 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1859 BigElement destElem;
1860 %(readDest)s
1861 %(op)s
1862 destReg.elements[i] = htog(destElem);
1863 }
1864 ''' % { "op" : op, "readDest" : readDestCode }
1865 for reg in range(rCount):
1866 eWalkCode += '''
1867 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1868 ''' % { "reg" : reg }
1869 iop = InstObjParams(name, Name,
1870 "RegRegOp",
1871 { "code": eWalkCode,
1872 "r_count": rCount,
1873 "predicate_test": predicateTest,
1874 "op_class": opClass }, [])
1875 header_output += NeonRegRegOpDeclare.subst(iop)
1876 exec_output += NeonUnequalRegExecute.subst(iop)
1877 for type in types:
1878 substDict = { "targs" : type,
1879 "class_name" : Name }
1880 exec_output += NeonExecDeclare.subst(substDict)
1881
1882 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1883 global header_output, exec_output
1884 eWalkCode = simdEnabledCheckCode + '''
1885 BigRegVect srcReg1;
1886 RegVect destReg;
1887 '''
1888 for reg in range(4):
1889 eWalkCode += '''
1890 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1891 ''' % { "reg" : reg }
1892 if readDest:
1893 for reg in range(2):
1894 eWalkCode += '''
1895 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1896 ''' % { "reg" : reg }
1897 readDestCode = ''
1898 if readDest:
1899 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1900 eWalkCode += '''
1901 for (unsigned i = 0; i < eCount; i++) {
1902 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1903 Element destElem;
1904 %(readDest)s
1905 %(op)s
1906 destReg.elements[i] = htog(destElem);
1907 }
1908 ''' % { "op" : op, "readDest" : readDestCode }
1909 for reg in range(2):
1910 eWalkCode += '''
1911 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1912 ''' % { "reg" : reg }
1913 iop = InstObjParams(name, Name,
1914 "RegRegOp",
1915 { "code": eWalkCode,
1916 "r_count": 2,
1917 "predicate_test": predicateTest,
1918 "op_class": opClass }, [])
1919 header_output += NeonRegRegOpDeclare.subst(iop)
1920 exec_output += NeonUnequalRegExecute.subst(iop)
1921 for type in types:
1922 substDict = { "targs" : type,
1923 "class_name" : Name }
1924 exec_output += NeonExecDeclare.subst(substDict)
1925
1926 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1927 global header_output, exec_output
1928 eWalkCode = simdEnabledCheckCode + '''
1929 RegVect destReg;
1930 '''
1931 if readDest:
1932 for reg in range(rCount):
1933 eWalkCode += '''
1934 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1935 ''' % { "reg" : reg }
1936 readDestCode = ''
1937 if readDest:
1938 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1939 eWalkCode += '''
1940 for (unsigned i = 0; i < eCount; i++) {
1941 Element destElem;
1942 %(readDest)s
1943 %(op)s
1944 destReg.elements[i] = htog(destElem);
1945 }
1946 ''' % { "op" : op, "readDest" : readDestCode }
1947 for reg in range(rCount):
1948 eWalkCode += '''
1949 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1950 ''' % { "reg" : reg }
1951 iop = InstObjParams(name, Name,
1952 "RegImmOp",
1953 { "code": eWalkCode,
1954 "r_count": rCount,
1955 "predicate_test": predicateTest,
1956 "op_class": opClass }, [])
1957 header_output += NeonRegImmOpDeclare.subst(iop)
1958 exec_output += NeonEqualRegExecute.subst(iop)
1959 for type in types:
1960 substDict = { "targs" : type,
1961 "class_name" : Name }
1962 exec_output += NeonExecDeclare.subst(substDict)
1963
1964 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1965 global header_output, exec_output
1966 eWalkCode = simdEnabledCheckCode + '''
1967 RegVect srcReg1;
1968 BigRegVect destReg;
1969 '''
1970 for reg in range(2):
1971 eWalkCode += '''
1972 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1973 ''' % { "reg" : reg }
1974 if readDest:
1975 for reg in range(4):
1976 eWalkCode += '''
1977 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1978 ''' % { "reg" : reg }
1979 readDestCode = ''
1980 if readDest:
1981 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1982 eWalkCode += '''
1983 for (unsigned i = 0; i < eCount; i++) {
1984 Element srcElem1 = gtoh(srcReg1.elements[i]);
1985 BigElement destElem;
1986 %(readDest)s
1987 %(op)s
1988 destReg.elements[i] = htog(destElem);
1989 }
1990 ''' % { "op" : op, "readDest" : readDestCode }
1991 for reg in range(4):
1992 eWalkCode += '''
1993 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1994 ''' % { "reg" : reg }
1995 iop = InstObjParams(name, Name,
1996 "RegRegOp",
1997 { "code": eWalkCode,
1998 "r_count": 2,
1999 "predicate_test": predicateTest,
2000 "op_class": opClass }, [])
2001 header_output += NeonRegRegOpDeclare.subst(iop)
2002 exec_output += NeonUnequalRegExecute.subst(iop)
2003 for type in types:
2004 substDict = { "targs" : type,
2005 "class_name" : Name }
2006 exec_output += NeonExecDeclare.subst(substDict)
2007
2008 vhaddCode = '''
2009 Element carryBit =
2010 (((unsigned)srcElem1 & 0x1) +
2011 ((unsigned)srcElem2 & 0x1)) >> 1;
2012 // Use division instead of a shift to ensure the sign extension works
2013 // right. The compiler will figure out if it can be a shift. Mask the
2014 // inputs so they get truncated correctly.
2015 destElem = (((srcElem1 & ~(Element)1) / 2) +
2016 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2017 '''
2018 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2019 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2020
2021 vrhaddCode = '''
2022 Element carryBit =
2023 (((unsigned)srcElem1 & 0x1) +
2024 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2025 // Use division instead of a shift to ensure the sign extension works
2026 // right. The compiler will figure out if it can be a shift. Mask the
2027 // inputs so they get truncated correctly.
2028 destElem = (((srcElem1 & ~(Element)1) / 2) +
2029 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2030 '''
2031 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2032 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2033
2034 vhsubCode = '''
2035 Element barrowBit =
2036 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2037 // Use division instead of a shift to ensure the sign extension works
2038 // right. The compiler will figure out if it can be a shift. Mask the
2039 // inputs so they get truncated correctly.
2040 destElem = (((srcElem1 & ~(Element)1) / 2) -
2041 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2042 '''
2043 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2044 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2045
2046 vandCode = '''
2047 destElem = srcElem1 & srcElem2;
2048 '''
2049 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2050 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2051
2052 vbicCode = '''
2053 destElem = srcElem1 & ~srcElem2;
2054 '''
2055 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2056 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2057
2058 vorrCode = '''
2059 destElem = srcElem1 | srcElem2;
2060 '''
2061 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2062 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2063
2064 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2065 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2066
2067 vornCode = '''
2068 destElem = srcElem1 | ~srcElem2;
2069 '''
2070 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2071 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2072
2073 veorCode = '''
2074 destElem = srcElem1 ^ srcElem2;
2075 '''
2076 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2077 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2078
2079 vbifCode = '''
2080 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2081 '''
2082 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2083 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2084 vbitCode = '''
2085 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2086 '''
2087 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2088 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2089 vbslCode = '''
2090 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2091 '''
2092 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2093 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2094
2095 vmaxCode = '''
2096 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2097 '''
2098 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2099 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2100
2101 vminCode = '''
2102 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2103 '''
2104 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2105 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2106
2107 vaddCode = '''
2108 destElem = srcElem1 + srcElem2;
2109 '''
2110 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2111 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2112
2113 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2114 2, vaddCode, pairwise=True)
2115 vaddlwCode = '''
2116 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2117 '''
2118 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2119 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2120 vaddhnCode = '''
2121 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2122 (sizeof(Element) * 8);
2123 '''
2124 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2125 vraddhnCode = '''
2126 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2127 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2128 (sizeof(Element) * 8);
2129 '''
2130 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2131
2132 vsubCode = '''
2133 destElem = srcElem1 - srcElem2;
2134 '''
2135 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2136 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2137 vsublwCode = '''
2138 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2139 '''
2140 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2141 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2142
2143 vqaddUCode = '''
2144 destElem = srcElem1 + srcElem2;
2145 FPSCR fpscr = (FPSCR) FpscrQc;
2146 if (destElem < srcElem1 || destElem < srcElem2) {
2147 destElem = (Element)(-1);
2148 fpscr.qc = 1;
2149 }
2150 FpscrQc = fpscr;
2151 '''
2152 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2153 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2154 vsubhnCode = '''
2155 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2156 (sizeof(Element) * 8);
2157 '''
2158 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2159 vrsubhnCode = '''
2160 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2161 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2162 (sizeof(Element) * 8);
2163 '''
2164 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2165
2166 vqaddSCode = '''
2167 destElem = srcElem1 + srcElem2;
2168 FPSCR fpscr = (FPSCR) FpscrQc;
2169 bool negDest = (destElem < 0);
2170 bool negSrc1 = (srcElem1 < 0);
2171 bool negSrc2 = (srcElem2 < 0);
2172 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2173 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2174 if (negDest)
2175 destElem -= 1;
2176 fpscr.qc = 1;
2177 }
2178 FpscrQc = fpscr;
2179 '''
2180 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2181 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2182
2183 vqsubUCode = '''
2184 destElem = srcElem1 - srcElem2;
2185 FPSCR fpscr = (FPSCR) FpscrQc;
2186 if (destElem > srcElem1) {
2187 destElem = 0;
2188 fpscr.qc = 1;
2189 }
2190 FpscrQc = fpscr;
2191 '''
2192 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2193 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2194
2195 vqsubSCode = '''
2196 destElem = srcElem1 - srcElem2;
2197 FPSCR fpscr = (FPSCR) FpscrQc;
2198 bool negDest = (destElem < 0);
2199 bool negSrc1 = (srcElem1 < 0);
2200 bool posSrc2 = (srcElem2 >= 0);
2201 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2202 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2203 if (negDest)
2204 destElem -= 1;
2205 fpscr.qc = 1;
2206 }
2207 FpscrQc = fpscr;
2208 '''
2209 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2210 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2211
2212 vcgtCode = '''
2213 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2214 '''
2215 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2216 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2217
2218 vcgeCode = '''
2219 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2220 '''
2221 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2222 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2223
2224 vceqCode = '''
2225 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2226 '''
2227 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2228 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2229
2230 vshlCode = '''
2231 int16_t shiftAmt = (int8_t)srcElem2;
2232 if (shiftAmt < 0) {
2233 shiftAmt = -shiftAmt;
2234 if (shiftAmt >= sizeof(Element) * 8) {
2235 shiftAmt = sizeof(Element) * 8 - 1;
2236 destElem = 0;
2237 } else {
2238 destElem = (srcElem1 >> shiftAmt);
2239 }
2240 // Make sure the right shift sign extended when it should.
2241 if (ltz(srcElem1) && !ltz(destElem)) {
2242 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2243 1 - shiftAmt));
2244 }
2245 } else {
2246 if (shiftAmt >= sizeof(Element) * 8) {
2247 destElem = 0;
2248 } else {
2249 destElem = srcElem1 << shiftAmt;
2250 }
2251 }
2252 '''
2253 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2254 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2255
2256 vrshlCode = '''
2257 int16_t shiftAmt = (int8_t)srcElem2;
2258 if (shiftAmt < 0) {
2259 shiftAmt = -shiftAmt;
2260 Element rBit = 0;
2261 if (shiftAmt <= sizeof(Element) * 8)
2262 rBit = bits(srcElem1, shiftAmt - 1);
2263 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2264 rBit = 1;
2265 if (shiftAmt >= sizeof(Element) * 8) {
2266 shiftAmt = sizeof(Element) * 8 - 1;
2267 destElem = 0;
2268 } else {
2269 destElem = (srcElem1 >> shiftAmt);
2270 }
2271 // Make sure the right shift sign extended when it should.
2272 if (ltz(srcElem1) && !ltz(destElem)) {
2273 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2274 1 - shiftAmt));
2275 }
2276 destElem += rBit;
2277 } else if (shiftAmt > 0) {
2278 if (shiftAmt >= sizeof(Element) * 8) {
2279 destElem = 0;
2280 } else {
2281 destElem = srcElem1 << shiftAmt;
2282 }
2283 } else {
2284 destElem = srcElem1;
2285 }
2286 '''
2287 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2288 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2289
2290 vqshlUCode = '''
2291 int16_t shiftAmt = (int8_t)srcElem2;
2292 FPSCR fpscr = (FPSCR) FpscrQc;
2293 if (shiftAmt < 0) {
2294 shiftAmt = -shiftAmt;
2295 if (shiftAmt >= sizeof(Element) * 8) {
2296 shiftAmt = sizeof(Element) * 8 - 1;
2297 destElem = 0;
2298 } else {
2299 destElem = (srcElem1 >> shiftAmt);
2300 }
2301 } else if (shiftAmt > 0) {
2302 if (shiftAmt >= sizeof(Element) * 8) {
2303 if (srcElem1 != 0) {
2304 destElem = mask(sizeof(Element) * 8);
2305 fpscr.qc = 1;
2306 } else {
2307 destElem = 0;
2308 }
2309 } else {
2310 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2311 sizeof(Element) * 8 - shiftAmt)) {
2312 destElem = mask(sizeof(Element) * 8);
2313 fpscr.qc = 1;
2314 } else {
2315 destElem = srcElem1 << shiftAmt;
2316 }
2317 }
2318 } else {
2319 destElem = srcElem1;
2320 }
2321 FpscrQc = fpscr;
2322 '''
2323 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2324 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2325
2326 vqshlSCode = '''
2327 int16_t shiftAmt = (int8_t)srcElem2;
2328 FPSCR fpscr = (FPSCR) FpscrQc;
2329 if (shiftAmt < 0) {
2330 shiftAmt = -shiftAmt;
2331 if (shiftAmt >= sizeof(Element) * 8) {
2332 shiftAmt = sizeof(Element) * 8 - 1;
2333 destElem = 0;
2334 } else {
2335 destElem = (srcElem1 >> shiftAmt);
2336 }
2337 // Make sure the right shift sign extended when it should.
2338 if (srcElem1 < 0 && destElem >= 0) {
2339 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2340 1 - shiftAmt));
2341 }
2342 } else if (shiftAmt > 0) {
2343 bool sat = false;
2344 if (shiftAmt >= sizeof(Element) * 8) {
2345 if (srcElem1 != 0)
2346 sat = true;
2347 else
2348 destElem = 0;
2349 } else {
2350 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2351 sizeof(Element) * 8 - 1 - shiftAmt) !=
2352 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2353 sat = true;
2354 } else {
2355 destElem = srcElem1 << shiftAmt;
2356 }
2357 }
2358 if (sat) {
2359 fpscr.qc = 1;
2360 destElem = mask(sizeof(Element) * 8 - 1);
2361 if (srcElem1 < 0)
2362 destElem = ~destElem;
2363 }
2364 } else {
2365 destElem = srcElem1;
2366 }
2367 FpscrQc = fpscr;
2368 '''
2369 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2370 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2371
2372 vqrshlUCode = '''
2373 int16_t shiftAmt = (int8_t)srcElem2;
2374 FPSCR fpscr = (FPSCR) FpscrQc;
2375 if (shiftAmt < 0) {
2376 shiftAmt = -shiftAmt;
2377 Element rBit = 0;
2378 if (shiftAmt <= sizeof(Element) * 8)
2379 rBit = bits(srcElem1, shiftAmt - 1);
2380 if (shiftAmt >= sizeof(Element) * 8) {
2381 shiftAmt = sizeof(Element) * 8 - 1;
2382 destElem = 0;
2383 } else {
2384 destElem = (srcElem1 >> shiftAmt);
2385 }
2386 destElem += rBit;
2387 } else {
2388 if (shiftAmt >= sizeof(Element) * 8) {
2389 if (srcElem1 != 0) {
2390 destElem = mask(sizeof(Element) * 8);
2391 fpscr.qc = 1;
2392 } else {
2393 destElem = 0;
2394 }
2395 } else {
2396 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2397 sizeof(Element) * 8 - shiftAmt)) {
2398 destElem = mask(sizeof(Element) * 8);
2399 fpscr.qc = 1;
2400 } else {
2401 destElem = srcElem1 << shiftAmt;
2402 }
2403 }
2404 }
2405 FpscrQc = fpscr;
2406 '''
2407 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2408 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2409
2410 vqrshlSCode = '''
2411 int16_t shiftAmt = (int8_t)srcElem2;
2412 FPSCR fpscr = (FPSCR) FpscrQc;
2413 if (shiftAmt < 0) {
2414 shiftAmt = -shiftAmt;
2415 Element rBit = 0;
2416 if (shiftAmt <= sizeof(Element) * 8)
2417 rBit = bits(srcElem1, shiftAmt - 1);
2418 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2419 rBit = 1;
2420 if (shiftAmt >= sizeof(Element) * 8) {
2421 shiftAmt = sizeof(Element) * 8 - 1;
2422 destElem = 0;
2423 } else {
2424 destElem = (srcElem1 >> shiftAmt);
2425 }
2426 // Make sure the right shift sign extended when it should.
2427 if (srcElem1 < 0 && destElem >= 0) {
2428 destElem |= -((Element)1 << (sizeof(Element) * 8 -
2429 1 - shiftAmt));
2430 }
2431 destElem += rBit;
2432 } else if (shiftAmt > 0) {
2433 bool sat = false;
2434 if (shiftAmt >= sizeof(Element) * 8) {
2435 if (srcElem1 != 0)
2436 sat = true;
2437 else
2438 destElem = 0;
2439 } else {
2440 if (bits(srcElem1, sizeof(Element) * 8 - 1,
2441 sizeof(Element) * 8 - 1 - shiftAmt) !=
2442 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2443 sat = true;
2444 } else {
2445 destElem = srcElem1 << shiftAmt;
2446 }
2447 }
2448 if (sat) {
2449 fpscr.qc = 1;
2450 destElem = mask(sizeof(Element) * 8 - 1);
2451 if (srcElem1 < 0)
2452 destElem = ~destElem;
2453 }
2454 } else {
2455 destElem = srcElem1;
2456 }
2457 FpscrQc = fpscr;
2458 '''
2459 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2460 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2461
2462 vabaCode = '''
2463 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2464 (srcElem2 - srcElem1);
2465 '''
2466 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2467 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2468 vabalCode = '''
2469 destElem += (srcElem1 > srcElem2) ?
2470 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2471 ((BigElement)srcElem2 - (BigElement)srcElem1);
2472 '''
2473 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2474
2475 vabdCode = '''
2476 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2477 (srcElem2 - srcElem1);
2478 '''
2479 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2480 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2481 vabdlCode = '''
2482 destElem = (srcElem1 > srcElem2) ?
2483 ((BigElement)srcElem1 - (BigElement)srcElem2) :
2484 ((BigElement)srcElem2 - (BigElement)srcElem1);
2485 '''
2486 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2487
2488 vtstCode = '''
2489 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2490 '''
2491 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2492 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2493
2494 vmulCode = '''
2495 destElem = srcElem1 * srcElem2;
2496 '''
2497 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2498 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2499 vmullCode = '''
2500 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2501 '''
2502 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2503
2504 vmlaCode = '''
2505 destElem = destElem + srcElem1 * srcElem2;
2506 '''
2507 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2508 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2509 vmlalCode = '''
2510 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2511 '''
2512 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2513
2514 vqdmlalCode = '''
2515 FPSCR fpscr = (FPSCR) FpscrQc;
2516 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2517 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2518 Element halfNeg = maxNeg / 2;
2519 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2520 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2521 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2522 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2523 fpscr.qc = 1;
2524 }
2525 bool negPreDest = ltz(destElem);
2526 destElem += midElem;
2527 bool negDest = ltz(destElem);
2528 bool negMid = ltz(midElem);
2529 if (negPreDest == negMid && negMid != negDest) {
2530 destElem = mask(sizeof(BigElement) * 8 - 1);
2531 if (negPreDest)
2532 destElem = ~destElem;
2533 fpscr.qc = 1;
2534 }
2535 FpscrQc = fpscr;
2536 '''
2537 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2538
2539 vqdmlslCode = '''
2540 FPSCR fpscr = (FPSCR) FpscrQc;
2541 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2542 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2543 Element halfNeg = maxNeg / 2;
2544 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2545 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2546 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2547 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2548 fpscr.qc = 1;
2549 }
2550 bool negPreDest = ltz(destElem);
2551 destElem -= midElem;
2552 bool negDest = ltz(destElem);
2553 bool posMid = ltz((BigElement)-midElem);
2554 if (negPreDest == posMid && posMid != negDest) {
2555 destElem = mask(sizeof(BigElement) * 8 - 1);
2556 if (negPreDest)
2557 destElem = ~destElem;
2558 fpscr.qc = 1;
2559 }
2560 FpscrQc = fpscr;
2561 '''
2562 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2563
2564 vqdmullCode = '''
2565 FPSCR fpscr = (FPSCR) FpscrQc;
2566 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2567 if (srcElem1 == srcElem2 &&
2568 srcElem1 == (Element)((Element)1 <<
2569 (Element)(sizeof(Element) * 8 - 1))) {
2570 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2571 fpscr.qc = 1;
2572 }
2573 FpscrQc = fpscr;
2574 '''
2575 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2576
2577 vmlsCode = '''
2578 destElem = destElem - srcElem1 * srcElem2;
2579 '''
2580 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2581 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2582 vmlslCode = '''
2583 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2584 '''
2585 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2586
2587 vmulpCode = '''
2588 destElem = 0;
2589 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2590 if (bits(srcElem2, j))
2591 destElem ^= srcElem1 << j;
2592 }
2593 '''
2594 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2595 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2596 vmullpCode = '''
2597 destElem = 0;
2598 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2599 if (bits(srcElem2, j))
2600 destElem ^= (BigElement)srcElem1 << j;
2601 }
2602 '''
2603 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2604
2605 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2606
2607 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2608
2609 vqdmulhCode = '''
2610 FPSCR fpscr = (FPSCR) FpscrQc;
2611 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2612 (sizeof(Element) * 8);
2613 if (srcElem1 == srcElem2 &&
2614 srcElem1 == (Element)((Element)1 <<
2615 (sizeof(Element) * 8 - 1))) {
2616 destElem = ~srcElem1;
2617 fpscr.qc = 1;
2618 }
2619 FpscrQc = fpscr;
2620 '''
2621 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2622 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2623
2624 vqrdmulhCode = '''
2625 FPSCR fpscr = (FPSCR) FpscrQc;
2626 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2627 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2628 (sizeof(Element) * 8);
2629 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2630 Element halfNeg = maxNeg / 2;
2631 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2632 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2633 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2634 if (destElem < 0) {
2635 destElem = mask(sizeof(Element) * 8 - 1);
2636 } else {
2637 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2638 }
2639 fpscr.qc = 1;
2640 }
2641 FpscrQc = fpscr;
2642 '''
2643 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2644 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2645 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2646 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2647
2648 vmaxfpCode = '''
2649 FPSCR fpscr = (FPSCR) FpscrExc;
2650 bool done;
2651 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2652 if (!done) {
2653 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2654 true, true, VfpRoundNearest);
2655 } else if (flushToZero(srcReg1, srcReg2)) {
2656 fpscr.idc = 1;
2657 }
2658 FpscrExc = fpscr;
2659 '''
2660 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2661 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2662
2663 vminfpCode = '''
2664 FPSCR fpscr = (FPSCR) FpscrExc;
2665 bool done;
2666 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2667 if (!done) {
2668 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2669 true, true, VfpRoundNearest);
2670 } else if (flushToZero(srcReg1, srcReg2)) {
2671 fpscr.idc = 1;
2672 }
2673 FpscrExc = fpscr;
2674 '''
2675 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2676 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2677
2678 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2679 2, vmaxfpCode, pairwise=True)
2680 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2681 4, vmaxfpCode, pairwise=True)
2682
2683 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2684 2, vminfpCode, pairwise=True)
2685 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2686 4, vminfpCode, pairwise=True)
2687
2688 vaddfpCode = '''
2689 FPSCR fpscr = (FPSCR) FpscrExc;
2690 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2691 true, true, VfpRoundNearest);
2692 FpscrExc = fpscr;
2693 '''
2694 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2695 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2696
2697 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2698 2, vaddfpCode, pairwise=True)
2699 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2700 4, vaddfpCode, pairwise=True)
2701
2702 vsubfpCode = '''
2703 FPSCR fpscr = (FPSCR) FpscrExc;
2704 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2705 true, true, VfpRoundNearest);
2706 FpscrExc = fpscr;
2707 '''
2708 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2709 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2710
2711 vmulfpCode = '''
2712 FPSCR fpscr = (FPSCR) FpscrExc;
2713 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2714 true, true, VfpRoundNearest);
2715 FpscrExc = fpscr;
2716 '''
2717 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2718 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2719
2720 vmlafpCode = '''
2721 FPSCR fpscr = (FPSCR) FpscrExc;
2722 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2723 true, true, VfpRoundNearest);
2724 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2725 true, true, VfpRoundNearest);
2726 FpscrExc = fpscr;
2727 '''
2728 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2729 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2730
2731 vfmafpCode = '''
2732 FPSCR fpscr = (FPSCR) FpscrExc;
2733 destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2734 true, true, VfpRoundNearest);
2735 FpscrExc = fpscr;
2736 '''
2737 threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2738 threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2739
2740 vfmsfpCode = '''
2741 FPSCR fpscr = (FPSCR) FpscrExc;
2742 destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2743 true, true, VfpRoundNearest);
2744 FpscrExc = fpscr;
2745 '''
2746 threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2747 threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2748
2749 vmlsfpCode = '''
2750 FPSCR fpscr = (FPSCR) FpscrExc;
2751 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2752 true, true, VfpRoundNearest);
2753 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2754 true, true, VfpRoundNearest);
2755 FpscrExc = fpscr;
2756 '''
2757 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2758 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2759
2760 vcgtfpCode = '''
2761 FPSCR fpscr = (FPSCR) FpscrExc;
2762 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2763 true, true, VfpRoundNearest);
2764 destReg = (res == 0) ? -1 : 0;
2765 if (res == 2.0)
2766 fpscr.ioc = 1;
2767 FpscrExc = fpscr;
2768 '''
2769 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2770 2, vcgtfpCode, toInt = True)
2771 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2772 4, vcgtfpCode, toInt = True)
2773
2774 vcgefpCode = '''
2775 FPSCR fpscr = (FPSCR) FpscrExc;
2776 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2777 true, true, VfpRoundNearest);
2778 destReg = (res == 0) ? -1 : 0;
2779 if (res == 2.0)
2780 fpscr.ioc = 1;
2781 FpscrExc = fpscr;
2782 '''
2783 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2784 2, vcgefpCode, toInt = True)
2785 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2786 4, vcgefpCode, toInt = True)
2787
2788 vacgtfpCode = '''
2789 FPSCR fpscr = (FPSCR) FpscrExc;
2790 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2791 true, true, VfpRoundNearest);
2792 destReg = (res == 0) ? -1 : 0;
2793 if (res == 2.0)
2794 fpscr.ioc = 1;
2795 FpscrExc = fpscr;
2796 '''
2797 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2798 2, vacgtfpCode, toInt = True)
2799 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2800 4, vacgtfpCode, toInt = True)
2801
2802 vacgefpCode = '''
2803 FPSCR fpscr = (FPSCR) FpscrExc;
2804 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2805 true, true, VfpRoundNearest);
2806 destReg = (res == 0) ? -1 : 0;
2807 if (res == 2.0)
2808 fpscr.ioc = 1;
2809 FpscrExc = fpscr;
2810 '''
2811 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2812 2, vacgefpCode, toInt = True)
2813 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2814 4, vacgefpCode, toInt = True)
2815
2816 vceqfpCode = '''
2817 FPSCR fpscr = (FPSCR) FpscrExc;
2818 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2819 true, true, VfpRoundNearest);
2820 destReg = (res == 0) ? -1 : 0;
2821 if (res == 2.0)
2822 fpscr.ioc = 1;
2823 FpscrExc = fpscr;
2824 '''
2825 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2826 2, vceqfpCode, toInt = True)
2827 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2828 4, vceqfpCode, toInt = True)
2829
2830 vrecpsCode = '''
2831 FPSCR fpscr = (FPSCR) FpscrExc;
2832 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2833 true, true, VfpRoundNearest);
2834 FpscrExc = fpscr;
2835 '''
2836 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2837 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2838
2839 vrsqrtsCode = '''
2840 FPSCR fpscr = (FPSCR) FpscrExc;
2841 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2842 true, true, VfpRoundNearest);
2843 FpscrExc = fpscr;
2844 '''
2845 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2846 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2847
2848 vabdfpCode = '''
2849 FPSCR fpscr = (FPSCR) FpscrExc;
2850 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2851 true, true, VfpRoundNearest);
2852 destReg = fabs(mid);
2853 FpscrExc = fpscr;
2854 '''
2855 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2856 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2857
2858 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2859 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2860 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2861 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2862 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2863
2864 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2865 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2866 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2867 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2868 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2869
2870 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2871 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2872 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2873 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2874 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2875
2876 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2877 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2878 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2879 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2880 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2881 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2882 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2883 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2884 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2885
2886 vshrCode = '''
2887 if (imm >= sizeof(srcElem1) * 8) {
2888 if (ltz(srcElem1))
2889 destElem = -1;
2890 else
2891 destElem = 0;
2892 } else {
2893 destElem = srcElem1 >> imm;
2894 }
2895 '''
2896 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2897 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2898
2899 vsraCode = '''
2900 Element mid;;
2901 if (imm >= sizeof(srcElem1) * 8) {
2902 mid = ltz(srcElem1) ? -1 : 0;
2903 } else {
2904 mid = srcElem1 >> imm;
2905 if (ltz(srcElem1) && !ltz(mid)) {
2906 mid |= -(mid & ((Element)1 <<
2907 (sizeof(Element) * 8 - 1 - imm)));
2908 }
2909 }
2910 destElem += mid;
2911 '''
2912 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2913 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2914
2915 vrshrCode = '''
2916 if (imm > sizeof(srcElem1) * 8) {
2917 destElem = 0;
2918 } else if (imm) {
2919 Element rBit = bits(srcElem1, imm - 1);
2920 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2921 } else {
2922 destElem = srcElem1;
2923 }
2924 '''
2925 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2926 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2927
2928 vrsraCode = '''
2929 if (imm > sizeof(srcElem1) * 8) {
2930 destElem += 0;
2931 } else if (imm) {
2932 Element rBit = bits(srcElem1, imm - 1);
2933 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2934 } else {
2935 destElem += srcElem1;
2936 }
2937 '''
2938 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2939 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2940
2941 vsriCode = '''
2942 if (imm >= sizeof(Element) * 8)
2942 if (imm >= sizeof(Element) * 8) {
2943 destElem = destElem;
2943 destElem = destElem;
2944 else
2944 } else {
2945 destElem = (srcElem1 >> imm) |
2946 (destElem & ~mask(sizeof(Element) * 8 - imm));
2945 destElem = (srcElem1 >> imm) |
2946 (destElem & ~mask(sizeof(Element) * 8 - imm));
2947 }
2947 '''
2948 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2949 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2950
2951 vshlCode = '''
2948 '''
2949 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2950 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2951
2952 vshlCode = '''
2952 if (imm >= sizeof(Element) * 8)
2953 if (imm >= sizeof(Element) * 8) {
2953 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2954 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2954 else
2955 } else {
2955 destElem = srcElem1 << imm;
2956 destElem = srcElem1 << imm;
2957 }
2956 '''
2957 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2958 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2959
2960 vsliCode = '''
2958 '''
2959 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2960 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2961
2962 vsliCode = '''
2961 if (imm >= sizeof(Element) * 8)
2963 if (imm >= sizeof(Element) * 8) {
2962 destElem = destElem;
2964 destElem = destElem;
2963 else
2965 } else {
2964 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2966 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2967 }
2965 '''
2966 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2967 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2968
2969 vqshlCode = '''
2970 FPSCR fpscr = (FPSCR) FpscrQc;
2971 if (imm >= sizeof(Element) * 8) {
2972 if (srcElem1 != 0) {
2973 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2974 if (srcElem1 > 0)
2975 destElem = ~destElem;
2976 fpscr.qc = 1;
2977 } else {
2978 destElem = 0;
2979 }
2980 } else if (imm) {
2981 destElem = (srcElem1 << imm);
2982 uint64_t topBits = bits((uint64_t)srcElem1,
2983 sizeof(Element) * 8 - 1,
2984 sizeof(Element) * 8 - 1 - imm);
2985 if (topBits != 0 && topBits != mask(imm + 1)) {
2986 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2987 if (srcElem1 > 0)
2988 destElem = ~destElem;
2989 fpscr.qc = 1;
2990 }
2991 } else {
2992 destElem = srcElem1;
2993 }
2994 FpscrQc = fpscr;
2995 '''
2996 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2997 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2998
2999 vqshluCode = '''
3000 FPSCR fpscr = (FPSCR) FpscrQc;
3001 if (imm >= sizeof(Element) * 8) {
3002 if (srcElem1 != 0) {
3003 destElem = mask(sizeof(Element) * 8);
3004 fpscr.qc = 1;
3005 } else {
3006 destElem = 0;
3007 }
3008 } else if (imm) {
3009 destElem = (srcElem1 << imm);
3010 uint64_t topBits = bits((uint64_t)srcElem1,
3011 sizeof(Element) * 8 - 1,
3012 sizeof(Element) * 8 - imm);
3013 if (topBits != 0) {
3014 destElem = mask(sizeof(Element) * 8);
3015 fpscr.qc = 1;
3016 }
3017 } else {
3018 destElem = srcElem1;
3019 }
3020 FpscrQc = fpscr;
3021 '''
3022 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3023 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3024
3025 vqshlusCode = '''
3026 FPSCR fpscr = (FPSCR) FpscrQc;
3027 if (imm >= sizeof(Element) * 8) {
3028 if (srcElem1 < 0) {
3029 destElem = 0;
3030 fpscr.qc = 1;
3031 } else if (srcElem1 > 0) {
3032 destElem = mask(sizeof(Element) * 8);
3033 fpscr.qc = 1;
3034 } else {
3035 destElem = 0;
3036 }
3037 } else if (imm) {
3038 destElem = (srcElem1 << imm);
3039 uint64_t topBits = bits((uint64_t)srcElem1,
3040 sizeof(Element) * 8 - 1,
3041 sizeof(Element) * 8 - imm);
3042 if (srcElem1 < 0) {
3043 destElem = 0;
3044 fpscr.qc = 1;
3045 } else if (topBits != 0) {
3046 destElem = mask(sizeof(Element) * 8);
3047 fpscr.qc = 1;
3048 }
3049 } else {
3050 if (srcElem1 < 0) {
3051 fpscr.qc = 1;
3052 destElem = 0;
3053 } else {
3054 destElem = srcElem1;
3055 }
3056 }
3057 FpscrQc = fpscr;
3058 '''
3059 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3060 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3061
3062 vshrnCode = '''
3063 if (imm >= sizeof(srcElem1) * 8) {
3064 destElem = 0;
3065 } else {
3066 destElem = srcElem1 >> imm;
3067 }
3068 '''
3069 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3070
3071 vrshrnCode = '''
3072 if (imm > sizeof(srcElem1) * 8) {
3073 destElem = 0;
3074 } else if (imm) {
3075 Element rBit = bits(srcElem1, imm - 1);
3076 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3077 } else {
3078 destElem = srcElem1;
3079 }
3080 '''
3081 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3082
3083 vqshrnCode = '''
3084 FPSCR fpscr = (FPSCR) FpscrQc;
3085 if (imm > sizeof(srcElem1) * 8) {
3086 if (srcElem1 != 0 && srcElem1 != -1)
3087 fpscr.qc = 1;
3088 destElem = 0;
3089 } else if (imm) {
3090 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3091 mid |= -(mid & ((BigElement)1 <<
3092 (sizeof(BigElement) * 8 - 1 - imm)));
3093 if (mid != (Element)mid) {
3094 destElem = mask(sizeof(Element) * 8 - 1);
3095 if (srcElem1 < 0)
3096 destElem = ~destElem;
3097 fpscr.qc = 1;
3098 } else {
3099 destElem = mid;
3100 }
3101 } else {
3102 destElem = srcElem1;
3103 }
3104 FpscrQc = fpscr;
3105 '''
3106 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3107
3108 vqshrunCode = '''
3109 FPSCR fpscr = (FPSCR) FpscrQc;
3110 if (imm > sizeof(srcElem1) * 8) {
3111 if (srcElem1 != 0)
3112 fpscr.qc = 1;
3113 destElem = 0;
3114 } else if (imm) {
3115 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3116 if (mid != (Element)mid) {
3117 destElem = mask(sizeof(Element) * 8);
3118 fpscr.qc = 1;
3119 } else {
3120 destElem = mid;
3121 }
3122 } else {
3123 destElem = srcElem1;
3124 }
3125 FpscrQc = fpscr;
3126 '''
3127 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3128 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3129
3130 vqshrunsCode = '''
3131 FPSCR fpscr = (FPSCR) FpscrQc;
3132 if (imm > sizeof(srcElem1) * 8) {
3133 if (srcElem1 != 0)
3134 fpscr.qc = 1;
3135 destElem = 0;
3136 } else if (imm) {
3137 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3138 if (bits(mid, sizeof(BigElement) * 8 - 1,
3139 sizeof(Element) * 8) != 0) {
3140 if (srcElem1 < 0) {
3141 destElem = 0;
3142 } else {
3143 destElem = mask(sizeof(Element) * 8);
3144 }
3145 fpscr.qc = 1;
3146 } else {
3147 destElem = mid;
3148 }
3149 } else {
3150 destElem = srcElem1;
3151 }
3152 FpscrQc = fpscr;
3153 '''
3154 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3155 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3156
3157 vqrshrnCode = '''
3158 FPSCR fpscr = (FPSCR) FpscrQc;
3159 if (imm > sizeof(srcElem1) * 8) {
3160 if (srcElem1 != 0 && srcElem1 != -1)
3161 fpscr.qc = 1;
3162 destElem = 0;
3163 } else if (imm) {
3164 BigElement mid = (srcElem1 >> (imm - 1));
3165 uint64_t rBit = mid & 0x1;
3166 mid >>= 1;
3167 mid |= -(mid & ((BigElement)1 <<
3168 (sizeof(BigElement) * 8 - 1 - imm)));
3169 mid += rBit;
3170 if (mid != (Element)mid) {
3171 destElem = mask(sizeof(Element) * 8 - 1);
3172 if (srcElem1 < 0)
3173 destElem = ~destElem;
3174 fpscr.qc = 1;
3175 } else {
3176 destElem = mid;
3177 }
3178 } else {
3179 if (srcElem1 != (Element)srcElem1) {
3180 destElem = mask(sizeof(Element) * 8 - 1);
3181 if (srcElem1 < 0)
3182 destElem = ~destElem;
3183 fpscr.qc = 1;
3184 } else {
3185 destElem = srcElem1;
3186 }
3187 }
3188 FpscrQc = fpscr;
3189 '''
3190 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3191 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3192
3193 vqrshrunCode = '''
3194 FPSCR fpscr = (FPSCR) FpscrQc;
3195 if (imm > sizeof(srcElem1) * 8) {
3196 if (srcElem1 != 0)
3197 fpscr.qc = 1;
3198 destElem = 0;
3199 } else if (imm) {
3200 BigElement mid = (srcElem1 >> (imm - 1));
3201 uint64_t rBit = mid & 0x1;
3202 mid >>= 1;
3203 mid += rBit;
3204 if (mid != (Element)mid) {
3205 destElem = mask(sizeof(Element) * 8);
3206 fpscr.qc = 1;
3207 } else {
3208 destElem = mid;
3209 }
3210 } else {
3211 if (srcElem1 != (Element)srcElem1) {
3212 destElem = mask(sizeof(Element) * 8 - 1);
3213 fpscr.qc = 1;
3214 } else {
3215 destElem = srcElem1;
3216 }
3217 }
3218 FpscrQc = fpscr;
3219 '''
3220 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3221 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3222
3223 vqrshrunsCode = '''
3224 FPSCR fpscr = (FPSCR) FpscrQc;
3225 if (imm > sizeof(srcElem1) * 8) {
3226 if (srcElem1 != 0)
3227 fpscr.qc = 1;
3228 destElem = 0;
3229 } else if (imm) {
3230 BigElement mid = (srcElem1 >> (imm - 1));
3231 uint64_t rBit = mid & 0x1;
3232 mid >>= 1;
3233 mid |= -(mid & ((BigElement)1 <<
3234 (sizeof(BigElement) * 8 - 1 - imm)));
3235 mid += rBit;
3236 if (bits(mid, sizeof(BigElement) * 8 - 1,
3237 sizeof(Element) * 8) != 0) {
3238 if (srcElem1 < 0) {
3239 destElem = 0;
3240 } else {
3241 destElem = mask(sizeof(Element) * 8);
3242 }
3243 fpscr.qc = 1;
3244 } else {
3245 destElem = mid;
3246 }
3247 } else {
3248 if (srcElem1 < 0) {
3249 fpscr.qc = 1;
3250 destElem = 0;
3251 } else {
3252 destElem = srcElem1;
3253 }
3254 }
3255 FpscrQc = fpscr;
3256 '''
3257 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3258 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3259
3260 vshllCode = '''
3261 if (imm >= sizeof(destElem) * 8) {
3262 destElem = 0;
3263 } else {
3264 destElem = (BigElement)srcElem1 << imm;
3265 }
3266 '''
3267 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3268
3269 vmovlCode = '''
3270 destElem = srcElem1;
3271 '''
3272 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3273
3274 vcvt2ufxCode = '''
3275 FPSCR fpscr = (FPSCR) FpscrExc;
3276 if (flushToZero(srcElem1))
3277 fpscr.idc = 1;
3278 VfpSavedState state = prepFpState(VfpRoundNearest);
3279 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3280 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3281 __asm__ __volatile__("" :: "m" (destReg));
3282 finishVfp(fpscr, state, true);
3283 FpscrExc = fpscr;
3284 '''
3285 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3286 2, vcvt2ufxCode, toInt = True)
3287 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3288 4, vcvt2ufxCode, toInt = True)
3289
3290 vcvt2sfxCode = '''
3291 FPSCR fpscr = (FPSCR) FpscrExc;
3292 if (flushToZero(srcElem1))
3293 fpscr.idc = 1;
3294 VfpSavedState state = prepFpState(VfpRoundNearest);
3295 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3296 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3297 __asm__ __volatile__("" :: "m" (destReg));
3298 finishVfp(fpscr, state, true);
3299 FpscrExc = fpscr;
3300 '''
3301 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3302 2, vcvt2sfxCode, toInt = True)
3303 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3304 4, vcvt2sfxCode, toInt = True)
3305
3306 vcvtu2fpCode = '''
3307 FPSCR fpscr = (FPSCR) FpscrExc;
3308 VfpSavedState state = prepFpState(VfpRoundNearest);
3309 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3310 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3311 __asm__ __volatile__("" :: "m" (destElem));
3312 finishVfp(fpscr, state, true);
3313 FpscrExc = fpscr;
3314 '''
3315 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3316 2, vcvtu2fpCode, fromInt = True)
3317 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3318 4, vcvtu2fpCode, fromInt = True)
3319
3320 vcvts2fpCode = '''
3321 FPSCR fpscr = (FPSCR) FpscrExc;
3322 VfpSavedState state = prepFpState(VfpRoundNearest);
3323 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3324 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3325 __asm__ __volatile__("" :: "m" (destElem));
3326 finishVfp(fpscr, state, true);
3327 FpscrExc = fpscr;
3328 '''
3329 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3330 2, vcvts2fpCode, fromInt = True)
3331 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3332 4, vcvts2fpCode, fromInt = True)
3333
3334 vcvts2hCode = '''
3335 destElem = 0;
3336 FPSCR fpscr = (FPSCR) FpscrExc;
3337 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3338 if (flushToZero(srcFp1))
3339 fpscr.idc = 1;
3340 VfpSavedState state = prepFpState(VfpRoundNearest);
3341 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3342 : "m" (srcFp1), "m" (destElem));
3343 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3344 fpscr.ahp, srcFp1);
3345 __asm__ __volatile__("" :: "m" (destElem));
3346 finishVfp(fpscr, state, true);
3347 FpscrExc = fpscr;
3348 '''
3349 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3350
3351 vcvth2sCode = '''
3352 destElem = 0;
3353 FPSCR fpscr = (FPSCR) FpscrExc;
3354 VfpSavedState state = prepFpState(VfpRoundNearest);
3355 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3356 : "m" (srcElem1), "m" (destElem));
3357 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3358 __asm__ __volatile__("" :: "m" (destElem));
3359 finishVfp(fpscr, state, true);
3360 FpscrExc = fpscr;
3361 '''
3362 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3363
3364 vrsqrteCode = '''
3365 destElem = unsignedRSqrtEstimate(srcElem1);
3366 '''
3367 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3368 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3369
3370 vrsqrtefpCode = '''
3371 FPSCR fpscr = (FPSCR) FpscrExc;
3372 if (flushToZero(srcReg1))
3373 fpscr.idc = 1;
3374 destReg = fprSqrtEstimate(fpscr, srcReg1);
3375 FpscrExc = fpscr;
3376 '''
3377 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3378 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3379
3380 vrecpeCode = '''
3381 destElem = unsignedRecipEstimate(srcElem1);
3382 '''
3383 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3384 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3385
3386 vrecpefpCode = '''
3387 FPSCR fpscr = (FPSCR) FpscrExc;
3388 if (flushToZero(srcReg1))
3389 fpscr.idc = 1;
3390 destReg = fpRecipEstimate(fpscr, srcReg1);
3391 FpscrExc = fpscr;
3392 '''
3393 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3394 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3395
3396 vrev16Code = '''
3397 destElem = srcElem1;
3398 unsigned groupSize = ((1 << 1) / sizeof(Element));
3399 unsigned reverseMask = (groupSize - 1);
3400 j = i ^ reverseMask;
3401 '''
3402 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3403 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3404 vrev32Code = '''
3405 destElem = srcElem1;
3406 unsigned groupSize = ((1 << 2) / sizeof(Element));
3407 unsigned reverseMask = (groupSize - 1);
3408 j = i ^ reverseMask;
3409 '''
3410 twoRegMiscInst("vrev32", "NVrev32D",
3411 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3412 twoRegMiscInst("vrev32", "NVrev32Q",
3413 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3414 vrev64Code = '''
3415 destElem = srcElem1;
3416 unsigned groupSize = ((1 << 3) / sizeof(Element));
3417 unsigned reverseMask = (groupSize - 1);
3418 j = i ^ reverseMask;
3419 '''
3420 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3421 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3422
3423 split('exec')
3424 exec_output += vcompares + vcomparesL
3425
3426 vpaddlCode = '''
3427 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3428 '''
3429 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3430 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3431
3432 vpadalCode = '''
3433 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3434 '''
3435 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3436 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3437
3438 vclsCode = '''
3439 unsigned count = 0;
3440 if (srcElem1 < 0) {
3441 srcElem1 <<= 1;
3442 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3443 count++;
3444 srcElem1 <<= 1;
3445 }
3446 } else {
3447 srcElem1 <<= 1;
3448 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3449 count++;
3450 srcElem1 <<= 1;
3451 }
3452 }
3453 destElem = count;
3454 '''
3455 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3456 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3457
3458 vclzCode = '''
3459 unsigned count = 0;
3460 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3461 count++;
3462 srcElem1 <<= 1;
3463 }
3464 destElem = count;
3465 '''
3466 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3467 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3468
3469 vcntCode = '''
3470 unsigned count = 0;
3471 while (srcElem1 && count < sizeof(Element) * 8) {
3472 count += srcElem1 & 0x1;
3473 srcElem1 >>= 1;
3474 }
3475 destElem = count;
3476 '''
3477
3478 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3479 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3480
3481 vmvnCode = '''
3482 destElem = ~srcElem1;
3483 '''
3484 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3485 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3486
3487 vqabsCode = '''
3488 FPSCR fpscr = (FPSCR) FpscrQc;
3489 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3490 fpscr.qc = 1;
3491 destElem = ~srcElem1;
3492 } else if (srcElem1 < 0) {
3493 destElem = -srcElem1;
3494 } else {
3495 destElem = srcElem1;
3496 }
3497 FpscrQc = fpscr;
3498 '''
3499 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3500 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3501
3502 vqnegCode = '''
3503 FPSCR fpscr = (FPSCR) FpscrQc;
3504 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3505 fpscr.qc = 1;
3506 destElem = ~srcElem1;
3507 } else {
3508 destElem = -srcElem1;
3509 }
3510 FpscrQc = fpscr;
3511 '''
3512 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3513 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3514
3515 vabsCode = '''
3516 if (srcElem1 < 0) {
3517 destElem = -srcElem1;
3518 } else {
3519 destElem = srcElem1;
3520 }
3521 '''
3522
3523 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3524 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3525 vabsfpCode = '''
3526 union
3527 {
3528 uint32_t i;
3529 float f;
3530 } cStruct;
3531 cStruct.f = srcReg1;
3532 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3533 destReg = cStruct.f;
3534 '''
3535 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3536 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3537
3538 vnegCode = '''
3539 destElem = -srcElem1;
3540 '''
3541 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3542 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3543 vnegfpCode = '''
3544 destReg = -srcReg1;
3545 '''
3546 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3547 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3548
3549 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3550 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3551 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3552 vcgtfpCode = '''
3553 FPSCR fpscr = (FPSCR) FpscrExc;
3554 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3555 true, true, VfpRoundNearest);
3556 destReg = (res == 0) ? -1 : 0;
3557 if (res == 2.0)
3558 fpscr.ioc = 1;
3559 FpscrExc = fpscr;
3560 '''
3561 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3562 2, vcgtfpCode, toInt = True)
3563 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3564 4, vcgtfpCode, toInt = True)
3565
3566 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3567 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3568 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3569 vcgefpCode = '''
3570 FPSCR fpscr = (FPSCR) FpscrExc;
3571 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3572 true, true, VfpRoundNearest);
3573 destReg = (res == 0) ? -1 : 0;
3574 if (res == 2.0)
3575 fpscr.ioc = 1;
3576 FpscrExc = fpscr;
3577 '''
3578 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3579 2, vcgefpCode, toInt = True)
3580 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3581 4, vcgefpCode, toInt = True)
3582
3583 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3584 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3585 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3586 vceqfpCode = '''
3587 FPSCR fpscr = (FPSCR) FpscrExc;
3588 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3589 true, true, VfpRoundNearest);
3590 destReg = (res == 0) ? -1 : 0;
3591 if (res == 2.0)
3592 fpscr.ioc = 1;
3593 FpscrExc = fpscr;
3594 '''
3595 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3596 2, vceqfpCode, toInt = True)
3597 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3598 4, vceqfpCode, toInt = True)
3599
3600 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3601 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3602 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3603 vclefpCode = '''
3604 FPSCR fpscr = (FPSCR) FpscrExc;
3605 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3606 true, true, VfpRoundNearest);
3607 destReg = (res == 0) ? -1 : 0;
3608 if (res == 2.0)
3609 fpscr.ioc = 1;
3610 FpscrExc = fpscr;
3611 '''
3612 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3613 2, vclefpCode, toInt = True)
3614 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3615 4, vclefpCode, toInt = True)
3616
3617 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3618 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3619 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3620 vcltfpCode = '''
3621 FPSCR fpscr = (FPSCR) FpscrExc;
3622 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3623 true, true, VfpRoundNearest);
3624 destReg = (res == 0) ? -1 : 0;
3625 if (res == 2.0)
3626 fpscr.ioc = 1;
3627 FpscrExc = fpscr;
3628 '''
3629 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3630 2, vcltfpCode, toInt = True)
3631 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3632 4, vcltfpCode, toInt = True)
3633
3634 vswpCode = '''
3635 FloatRegBits mid;
3636 for (unsigned r = 0; r < rCount; r++) {
3637 mid = srcReg1.regs[r];
3638 srcReg1.regs[r] = destReg.regs[r];
3639 destReg.regs[r] = mid;
3640 }
3641 '''
3642 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3643 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3644
3645 vtrnCode = '''
3646 Element mid;
3647 for (unsigned i = 0; i < eCount; i += 2) {
3648 mid = srcReg1.elements[i];
3649 srcReg1.elements[i] = destReg.elements[i + 1];
3650 destReg.elements[i + 1] = mid;
3651 }
3652 '''
3653 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3654 smallUnsignedTypes, 2, vtrnCode)
3655 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3656 smallUnsignedTypes, 4, vtrnCode)
3657
3658 vuzpCode = '''
3659 Element mid[eCount];
3660 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3661 for (unsigned i = 0; i < eCount / 2; i++) {
3662 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3663 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3664 destReg.elements[i] = destReg.elements[2 * i];
3665 }
3666 for (unsigned i = 0; i < eCount / 2; i++) {
3667 destReg.elements[eCount / 2 + i] = mid[2 * i];
3668 }
3669 '''
3670 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3671 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3672
3673 vzipCode = '''
3674 Element mid[eCount];
3675 memcpy(&mid, &destReg, sizeof(destReg));
3676 for (unsigned i = 0; i < eCount / 2; i++) {
3677 destReg.elements[2 * i] = mid[i];
3678 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3679 }
3680 for (int i = 0; i < eCount / 2; i++) {
3681 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3682 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3683 }
3684 '''
3685 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3686 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3687
3688 vmovnCode = 'destElem = srcElem1;'
3689 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3690
3691 vdupCode = 'destElem = srcElem1;'
3692 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3693 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3694
3695 def vdupGprInst(name, Name, opClass, types, rCount):
3696 global header_output, exec_output
3697 eWalkCode = simdEnabledCheckCode + '''
3698 RegVect destReg;
3699 for (unsigned i = 0; i < eCount; i++) {
3700 destReg.elements[i] = htog((Element)Op1);
3701 }
3702 '''
3703 for reg in range(rCount):
3704 eWalkCode += '''
3705 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3706 ''' % { "reg" : reg }
3707 iop = InstObjParams(name, Name,
3708 "RegRegOp",
3709 { "code": eWalkCode,
3710 "r_count": rCount,
3711 "predicate_test": predicateTest,
3712 "op_class": opClass }, [])
3713 header_output += NeonRegRegOpDeclare.subst(iop)
3714 exec_output += NeonEqualRegExecute.subst(iop)
3715 for type in types:
3716 substDict = { "targs" : type,
3717 "class_name" : Name }
3718 exec_output += NeonExecDeclare.subst(substDict)
3719 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3720 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3721
3722 vmovCode = 'destElem = imm;'
3723 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3724 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3725
3726 vorrCode = 'destElem |= imm;'
3727 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3728 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3729
3730 vmvnCode = 'destElem = ~imm;'
3731 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3732 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3733
3734 vbicCode = 'destElem &= ~imm;'
3735 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3736 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3737
3738 vqmovnCode = '''
3739 FPSCR fpscr = (FPSCR) FpscrQc;
3740 destElem = srcElem1;
3741 if ((BigElement)destElem != srcElem1) {
3742 fpscr.qc = 1;
3743 destElem = mask(sizeof(Element) * 8 - 1);
3744 if (srcElem1 < 0)
3745 destElem = ~destElem;
3746 }
3747 FpscrQc = fpscr;
3748 '''
3749 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3750
3751 vqmovunCode = '''
3752 FPSCR fpscr = (FPSCR) FpscrQc;
3753 destElem = srcElem1;
3754 if ((BigElement)destElem != srcElem1) {
3755 fpscr.qc = 1;
3756 destElem = mask(sizeof(Element) * 8);
3757 }
3758 FpscrQc = fpscr;
3759 '''
3760 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3761 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3762
3763 vqmovunsCode = '''
3764 FPSCR fpscr = (FPSCR) FpscrQc;
3765 destElem = srcElem1;
3766 if (srcElem1 < 0 ||
3767 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3768 fpscr.qc = 1;
3769 destElem = mask(sizeof(Element) * 8);
3770 if (srcElem1 < 0)
3771 destElem = ~destElem;
3772 }
3773 FpscrQc = fpscr;
3774 '''
3775 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3776 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3777
3778 def buildVext(name, Name, opClass, types, rCount, op):
3779 global header_output, exec_output
3780 eWalkCode = simdEnabledCheckCode + '''
3781 RegVect srcReg1, srcReg2, destReg;
3782 '''
3783 for reg in range(rCount):
3784 eWalkCode += '''
3785 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3786 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3787 ''' % { "reg" : reg }
3788 eWalkCode += op
3789 for reg in range(rCount):
3790 eWalkCode += '''
3791 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3792 ''' % { "reg" : reg }
3793 iop = InstObjParams(name, Name,
3794 "RegRegRegImmOp",
3795 { "code": eWalkCode,
3796 "r_count": rCount,
3797 "predicate_test": predicateTest,
3798 "op_class": opClass }, [])
3799 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3800 exec_output += NeonEqualRegExecute.subst(iop)
3801 for type in types:
3802 substDict = { "targs" : type,
3803 "class_name" : Name }
3804 exec_output += NeonExecDeclare.subst(substDict)
3805
3806 vextCode = '''
3807 for (unsigned i = 0; i < eCount; i++) {
3808 unsigned index = i + imm;
3809 if (index < eCount) {
3810 destReg.elements[i] = srcReg1.elements[index];
3811 } else {
3812 index -= eCount;
3813 if (index >= eCount) {
3814 fault = std::make_shared<UndefinedInstruction>(machInst,
3815 false,
3816 mnemonic);
3817 } else {
3818 destReg.elements[i] = srcReg2.elements[index];
3819 }
3820 }
3821 }
3822 '''
3823 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3824 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3825
3826 def buildVtbxl(name, Name, opClass, length, isVtbl):
3827 global header_output, decoder_output, exec_output
3828 code = simdEnabledCheckCode + '''
3829 union
3830 {
3831 uint8_t bytes[32];
3832 FloatRegBits regs[8];
3833 } table;
3834
3835 union
3836 {
3837 uint8_t bytes[8];
3838 FloatRegBits regs[2];
3839 } destReg, srcReg2;
3840
3841 const unsigned length = %(length)d;
3842 const bool isVtbl = %(isVtbl)s;
3843
3844 srcReg2.regs[0] = htog(FpOp2P0_uw);
3845 srcReg2.regs[1] = htog(FpOp2P1_uw);
3846
3847 destReg.regs[0] = htog(FpDestP0_uw);
3848 destReg.regs[1] = htog(FpDestP1_uw);
3849 ''' % { "length" : length, "isVtbl" : isVtbl }
3850 for reg in range(8):
3851 if reg < length * 2:
3852 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3853 { "reg" : reg }
3854 else:
3855 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3856 code += '''
3857 for (unsigned i = 0; i < sizeof(destReg); i++) {
3858 uint8_t index = srcReg2.bytes[i];
3859 if (index < 8 * length) {
3860 destReg.bytes[i] = table.bytes[index];
3861 } else {
3862 if (isVtbl)
3863 destReg.bytes[i] = 0;
3864 // else destReg.bytes[i] unchanged
3865 }
3866 }
3867
3868 FpDestP0_uw = gtoh(destReg.regs[0]);
3869 FpDestP1_uw = gtoh(destReg.regs[1]);
3870 '''
3871 iop = InstObjParams(name, Name,
3872 "RegRegRegOp",
3873 { "code": code,
3874 "predicate_test": predicateTest,
3875 "op_class": opClass }, [])
3876 header_output += RegRegRegOpDeclare.subst(iop)
3877 decoder_output += RegRegRegOpConstructor.subst(iop)
3878 exec_output += PredOpExecute.subst(iop)
3879
3880 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3881 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3882 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3883 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3884
3885 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3886 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3887 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3888 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3889}};
2968 '''
2969 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2970 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2971
2972 vqshlCode = '''
2973 FPSCR fpscr = (FPSCR) FpscrQc;
2974 if (imm >= sizeof(Element) * 8) {
2975 if (srcElem1 != 0) {
2976 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2977 if (srcElem1 > 0)
2978 destElem = ~destElem;
2979 fpscr.qc = 1;
2980 } else {
2981 destElem = 0;
2982 }
2983 } else if (imm) {
2984 destElem = (srcElem1 << imm);
2985 uint64_t topBits = bits((uint64_t)srcElem1,
2986 sizeof(Element) * 8 - 1,
2987 sizeof(Element) * 8 - 1 - imm);
2988 if (topBits != 0 && topBits != mask(imm + 1)) {
2989 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2990 if (srcElem1 > 0)
2991 destElem = ~destElem;
2992 fpscr.qc = 1;
2993 }
2994 } else {
2995 destElem = srcElem1;
2996 }
2997 FpscrQc = fpscr;
2998 '''
2999 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3000 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3001
3002 vqshluCode = '''
3003 FPSCR fpscr = (FPSCR) FpscrQc;
3004 if (imm >= sizeof(Element) * 8) {
3005 if (srcElem1 != 0) {
3006 destElem = mask(sizeof(Element) * 8);
3007 fpscr.qc = 1;
3008 } else {
3009 destElem = 0;
3010 }
3011 } else if (imm) {
3012 destElem = (srcElem1 << imm);
3013 uint64_t topBits = bits((uint64_t)srcElem1,
3014 sizeof(Element) * 8 - 1,
3015 sizeof(Element) * 8 - imm);
3016 if (topBits != 0) {
3017 destElem = mask(sizeof(Element) * 8);
3018 fpscr.qc = 1;
3019 }
3020 } else {
3021 destElem = srcElem1;
3022 }
3023 FpscrQc = fpscr;
3024 '''
3025 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3026 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3027
3028 vqshlusCode = '''
3029 FPSCR fpscr = (FPSCR) FpscrQc;
3030 if (imm >= sizeof(Element) * 8) {
3031 if (srcElem1 < 0) {
3032 destElem = 0;
3033 fpscr.qc = 1;
3034 } else if (srcElem1 > 0) {
3035 destElem = mask(sizeof(Element) * 8);
3036 fpscr.qc = 1;
3037 } else {
3038 destElem = 0;
3039 }
3040 } else if (imm) {
3041 destElem = (srcElem1 << imm);
3042 uint64_t topBits = bits((uint64_t)srcElem1,
3043 sizeof(Element) * 8 - 1,
3044 sizeof(Element) * 8 - imm);
3045 if (srcElem1 < 0) {
3046 destElem = 0;
3047 fpscr.qc = 1;
3048 } else if (topBits != 0) {
3049 destElem = mask(sizeof(Element) * 8);
3050 fpscr.qc = 1;
3051 }
3052 } else {
3053 if (srcElem1 < 0) {
3054 fpscr.qc = 1;
3055 destElem = 0;
3056 } else {
3057 destElem = srcElem1;
3058 }
3059 }
3060 FpscrQc = fpscr;
3061 '''
3062 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3063 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3064
3065 vshrnCode = '''
3066 if (imm >= sizeof(srcElem1) * 8) {
3067 destElem = 0;
3068 } else {
3069 destElem = srcElem1 >> imm;
3070 }
3071 '''
3072 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3073
3074 vrshrnCode = '''
3075 if (imm > sizeof(srcElem1) * 8) {
3076 destElem = 0;
3077 } else if (imm) {
3078 Element rBit = bits(srcElem1, imm - 1);
3079 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3080 } else {
3081 destElem = srcElem1;
3082 }
3083 '''
3084 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3085
3086 vqshrnCode = '''
3087 FPSCR fpscr = (FPSCR) FpscrQc;
3088 if (imm > sizeof(srcElem1) * 8) {
3089 if (srcElem1 != 0 && srcElem1 != -1)
3090 fpscr.qc = 1;
3091 destElem = 0;
3092 } else if (imm) {
3093 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3094 mid |= -(mid & ((BigElement)1 <<
3095 (sizeof(BigElement) * 8 - 1 - imm)));
3096 if (mid != (Element)mid) {
3097 destElem = mask(sizeof(Element) * 8 - 1);
3098 if (srcElem1 < 0)
3099 destElem = ~destElem;
3100 fpscr.qc = 1;
3101 } else {
3102 destElem = mid;
3103 }
3104 } else {
3105 destElem = srcElem1;
3106 }
3107 FpscrQc = fpscr;
3108 '''
3109 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3110
3111 vqshrunCode = '''
3112 FPSCR fpscr = (FPSCR) FpscrQc;
3113 if (imm > sizeof(srcElem1) * 8) {
3114 if (srcElem1 != 0)
3115 fpscr.qc = 1;
3116 destElem = 0;
3117 } else if (imm) {
3118 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3119 if (mid != (Element)mid) {
3120 destElem = mask(sizeof(Element) * 8);
3121 fpscr.qc = 1;
3122 } else {
3123 destElem = mid;
3124 }
3125 } else {
3126 destElem = srcElem1;
3127 }
3128 FpscrQc = fpscr;
3129 '''
3130 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3131 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3132
3133 vqshrunsCode = '''
3134 FPSCR fpscr = (FPSCR) FpscrQc;
3135 if (imm > sizeof(srcElem1) * 8) {
3136 if (srcElem1 != 0)
3137 fpscr.qc = 1;
3138 destElem = 0;
3139 } else if (imm) {
3140 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3141 if (bits(mid, sizeof(BigElement) * 8 - 1,
3142 sizeof(Element) * 8) != 0) {
3143 if (srcElem1 < 0) {
3144 destElem = 0;
3145 } else {
3146 destElem = mask(sizeof(Element) * 8);
3147 }
3148 fpscr.qc = 1;
3149 } else {
3150 destElem = mid;
3151 }
3152 } else {
3153 destElem = srcElem1;
3154 }
3155 FpscrQc = fpscr;
3156 '''
3157 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3158 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3159
3160 vqrshrnCode = '''
3161 FPSCR fpscr = (FPSCR) FpscrQc;
3162 if (imm > sizeof(srcElem1) * 8) {
3163 if (srcElem1 != 0 && srcElem1 != -1)
3164 fpscr.qc = 1;
3165 destElem = 0;
3166 } else if (imm) {
3167 BigElement mid = (srcElem1 >> (imm - 1));
3168 uint64_t rBit = mid & 0x1;
3169 mid >>= 1;
3170 mid |= -(mid & ((BigElement)1 <<
3171 (sizeof(BigElement) * 8 - 1 - imm)));
3172 mid += rBit;
3173 if (mid != (Element)mid) {
3174 destElem = mask(sizeof(Element) * 8 - 1);
3175 if (srcElem1 < 0)
3176 destElem = ~destElem;
3177 fpscr.qc = 1;
3178 } else {
3179 destElem = mid;
3180 }
3181 } else {
3182 if (srcElem1 != (Element)srcElem1) {
3183 destElem = mask(sizeof(Element) * 8 - 1);
3184 if (srcElem1 < 0)
3185 destElem = ~destElem;
3186 fpscr.qc = 1;
3187 } else {
3188 destElem = srcElem1;
3189 }
3190 }
3191 FpscrQc = fpscr;
3192 '''
3193 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3194 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3195
3196 vqrshrunCode = '''
3197 FPSCR fpscr = (FPSCR) FpscrQc;
3198 if (imm > sizeof(srcElem1) * 8) {
3199 if (srcElem1 != 0)
3200 fpscr.qc = 1;
3201 destElem = 0;
3202 } else if (imm) {
3203 BigElement mid = (srcElem1 >> (imm - 1));
3204 uint64_t rBit = mid & 0x1;
3205 mid >>= 1;
3206 mid += rBit;
3207 if (mid != (Element)mid) {
3208 destElem = mask(sizeof(Element) * 8);
3209 fpscr.qc = 1;
3210 } else {
3211 destElem = mid;
3212 }
3213 } else {
3214 if (srcElem1 != (Element)srcElem1) {
3215 destElem = mask(sizeof(Element) * 8 - 1);
3216 fpscr.qc = 1;
3217 } else {
3218 destElem = srcElem1;
3219 }
3220 }
3221 FpscrQc = fpscr;
3222 '''
3223 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3224 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3225
3226 vqrshrunsCode = '''
3227 FPSCR fpscr = (FPSCR) FpscrQc;
3228 if (imm > sizeof(srcElem1) * 8) {
3229 if (srcElem1 != 0)
3230 fpscr.qc = 1;
3231 destElem = 0;
3232 } else if (imm) {
3233 BigElement mid = (srcElem1 >> (imm - 1));
3234 uint64_t rBit = mid & 0x1;
3235 mid >>= 1;
3236 mid |= -(mid & ((BigElement)1 <<
3237 (sizeof(BigElement) * 8 - 1 - imm)));
3238 mid += rBit;
3239 if (bits(mid, sizeof(BigElement) * 8 - 1,
3240 sizeof(Element) * 8) != 0) {
3241 if (srcElem1 < 0) {
3242 destElem = 0;
3243 } else {
3244 destElem = mask(sizeof(Element) * 8);
3245 }
3246 fpscr.qc = 1;
3247 } else {
3248 destElem = mid;
3249 }
3250 } else {
3251 if (srcElem1 < 0) {
3252 fpscr.qc = 1;
3253 destElem = 0;
3254 } else {
3255 destElem = srcElem1;
3256 }
3257 }
3258 FpscrQc = fpscr;
3259 '''
3260 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3261 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3262
3263 vshllCode = '''
3264 if (imm >= sizeof(destElem) * 8) {
3265 destElem = 0;
3266 } else {
3267 destElem = (BigElement)srcElem1 << imm;
3268 }
3269 '''
3270 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3271
3272 vmovlCode = '''
3273 destElem = srcElem1;
3274 '''
3275 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3276
3277 vcvt2ufxCode = '''
3278 FPSCR fpscr = (FPSCR) FpscrExc;
3279 if (flushToZero(srcElem1))
3280 fpscr.idc = 1;
3281 VfpSavedState state = prepFpState(VfpRoundNearest);
3282 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3283 destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3284 __asm__ __volatile__("" :: "m" (destReg));
3285 finishVfp(fpscr, state, true);
3286 FpscrExc = fpscr;
3287 '''
3288 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3289 2, vcvt2ufxCode, toInt = True)
3290 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3291 4, vcvt2ufxCode, toInt = True)
3292
3293 vcvt2sfxCode = '''
3294 FPSCR fpscr = (FPSCR) FpscrExc;
3295 if (flushToZero(srcElem1))
3296 fpscr.idc = 1;
3297 VfpSavedState state = prepFpState(VfpRoundNearest);
3298 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3299 destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3300 __asm__ __volatile__("" :: "m" (destReg));
3301 finishVfp(fpscr, state, true);
3302 FpscrExc = fpscr;
3303 '''
3304 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3305 2, vcvt2sfxCode, toInt = True)
3306 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3307 4, vcvt2sfxCode, toInt = True)
3308
3309 vcvtu2fpCode = '''
3310 FPSCR fpscr = (FPSCR) FpscrExc;
3311 VfpSavedState state = prepFpState(VfpRoundNearest);
3312 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3313 destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3314 __asm__ __volatile__("" :: "m" (destElem));
3315 finishVfp(fpscr, state, true);
3316 FpscrExc = fpscr;
3317 '''
3318 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3319 2, vcvtu2fpCode, fromInt = True)
3320 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3321 4, vcvtu2fpCode, fromInt = True)
3322
3323 vcvts2fpCode = '''
3324 FPSCR fpscr = (FPSCR) FpscrExc;
3325 VfpSavedState state = prepFpState(VfpRoundNearest);
3326 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3327 destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3328 __asm__ __volatile__("" :: "m" (destElem));
3329 finishVfp(fpscr, state, true);
3330 FpscrExc = fpscr;
3331 '''
3332 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3333 2, vcvts2fpCode, fromInt = True)
3334 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3335 4, vcvts2fpCode, fromInt = True)
3336
3337 vcvts2hCode = '''
3338 destElem = 0;
3339 FPSCR fpscr = (FPSCR) FpscrExc;
3340 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3341 if (flushToZero(srcFp1))
3342 fpscr.idc = 1;
3343 VfpSavedState state = prepFpState(VfpRoundNearest);
3344 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3345 : "m" (srcFp1), "m" (destElem));
3346 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3347 fpscr.ahp, srcFp1);
3348 __asm__ __volatile__("" :: "m" (destElem));
3349 finishVfp(fpscr, state, true);
3350 FpscrExc = fpscr;
3351 '''
3352 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3353
3354 vcvth2sCode = '''
3355 destElem = 0;
3356 FPSCR fpscr = (FPSCR) FpscrExc;
3357 VfpSavedState state = prepFpState(VfpRoundNearest);
3358 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3359 : "m" (srcElem1), "m" (destElem));
3360 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3361 __asm__ __volatile__("" :: "m" (destElem));
3362 finishVfp(fpscr, state, true);
3363 FpscrExc = fpscr;
3364 '''
3365 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3366
3367 vrsqrteCode = '''
3368 destElem = unsignedRSqrtEstimate(srcElem1);
3369 '''
3370 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3371 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3372
3373 vrsqrtefpCode = '''
3374 FPSCR fpscr = (FPSCR) FpscrExc;
3375 if (flushToZero(srcReg1))
3376 fpscr.idc = 1;
3377 destReg = fprSqrtEstimate(fpscr, srcReg1);
3378 FpscrExc = fpscr;
3379 '''
3380 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3381 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3382
3383 vrecpeCode = '''
3384 destElem = unsignedRecipEstimate(srcElem1);
3385 '''
3386 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3387 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3388
3389 vrecpefpCode = '''
3390 FPSCR fpscr = (FPSCR) FpscrExc;
3391 if (flushToZero(srcReg1))
3392 fpscr.idc = 1;
3393 destReg = fpRecipEstimate(fpscr, srcReg1);
3394 FpscrExc = fpscr;
3395 '''
3396 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3397 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3398
3399 vrev16Code = '''
3400 destElem = srcElem1;
3401 unsigned groupSize = ((1 << 1) / sizeof(Element));
3402 unsigned reverseMask = (groupSize - 1);
3403 j = i ^ reverseMask;
3404 '''
3405 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3406 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3407 vrev32Code = '''
3408 destElem = srcElem1;
3409 unsigned groupSize = ((1 << 2) / sizeof(Element));
3410 unsigned reverseMask = (groupSize - 1);
3411 j = i ^ reverseMask;
3412 '''
3413 twoRegMiscInst("vrev32", "NVrev32D",
3414 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3415 twoRegMiscInst("vrev32", "NVrev32Q",
3416 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3417 vrev64Code = '''
3418 destElem = srcElem1;
3419 unsigned groupSize = ((1 << 3) / sizeof(Element));
3420 unsigned reverseMask = (groupSize - 1);
3421 j = i ^ reverseMask;
3422 '''
3423 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3424 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3425
3426 split('exec')
3427 exec_output += vcompares + vcomparesL
3428
3429 vpaddlCode = '''
3430 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3431 '''
3432 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3433 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3434
3435 vpadalCode = '''
3436 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3437 '''
3438 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3439 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3440
3441 vclsCode = '''
3442 unsigned count = 0;
3443 if (srcElem1 < 0) {
3444 srcElem1 <<= 1;
3445 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3446 count++;
3447 srcElem1 <<= 1;
3448 }
3449 } else {
3450 srcElem1 <<= 1;
3451 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3452 count++;
3453 srcElem1 <<= 1;
3454 }
3455 }
3456 destElem = count;
3457 '''
3458 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3459 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3460
3461 vclzCode = '''
3462 unsigned count = 0;
3463 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3464 count++;
3465 srcElem1 <<= 1;
3466 }
3467 destElem = count;
3468 '''
3469 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3470 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3471
3472 vcntCode = '''
3473 unsigned count = 0;
3474 while (srcElem1 && count < sizeof(Element) * 8) {
3475 count += srcElem1 & 0x1;
3476 srcElem1 >>= 1;
3477 }
3478 destElem = count;
3479 '''
3480
3481 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3482 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3483
3484 vmvnCode = '''
3485 destElem = ~srcElem1;
3486 '''
3487 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3488 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3489
3490 vqabsCode = '''
3491 FPSCR fpscr = (FPSCR) FpscrQc;
3492 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3493 fpscr.qc = 1;
3494 destElem = ~srcElem1;
3495 } else if (srcElem1 < 0) {
3496 destElem = -srcElem1;
3497 } else {
3498 destElem = srcElem1;
3499 }
3500 FpscrQc = fpscr;
3501 '''
3502 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3503 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3504
3505 vqnegCode = '''
3506 FPSCR fpscr = (FPSCR) FpscrQc;
3507 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3508 fpscr.qc = 1;
3509 destElem = ~srcElem1;
3510 } else {
3511 destElem = -srcElem1;
3512 }
3513 FpscrQc = fpscr;
3514 '''
3515 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3516 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3517
3518 vabsCode = '''
3519 if (srcElem1 < 0) {
3520 destElem = -srcElem1;
3521 } else {
3522 destElem = srcElem1;
3523 }
3524 '''
3525
3526 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3527 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3528 vabsfpCode = '''
3529 union
3530 {
3531 uint32_t i;
3532 float f;
3533 } cStruct;
3534 cStruct.f = srcReg1;
3535 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3536 destReg = cStruct.f;
3537 '''
3538 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3539 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3540
3541 vnegCode = '''
3542 destElem = -srcElem1;
3543 '''
3544 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3545 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3546 vnegfpCode = '''
3547 destReg = -srcReg1;
3548 '''
3549 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3550 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3551
3552 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3553 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3554 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3555 vcgtfpCode = '''
3556 FPSCR fpscr = (FPSCR) FpscrExc;
3557 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3558 true, true, VfpRoundNearest);
3559 destReg = (res == 0) ? -1 : 0;
3560 if (res == 2.0)
3561 fpscr.ioc = 1;
3562 FpscrExc = fpscr;
3563 '''
3564 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3565 2, vcgtfpCode, toInt = True)
3566 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3567 4, vcgtfpCode, toInt = True)
3568
3569 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3570 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3571 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3572 vcgefpCode = '''
3573 FPSCR fpscr = (FPSCR) FpscrExc;
3574 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3575 true, true, VfpRoundNearest);
3576 destReg = (res == 0) ? -1 : 0;
3577 if (res == 2.0)
3578 fpscr.ioc = 1;
3579 FpscrExc = fpscr;
3580 '''
3581 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3582 2, vcgefpCode, toInt = True)
3583 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3584 4, vcgefpCode, toInt = True)
3585
3586 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3587 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3588 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3589 vceqfpCode = '''
3590 FPSCR fpscr = (FPSCR) FpscrExc;
3591 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3592 true, true, VfpRoundNearest);
3593 destReg = (res == 0) ? -1 : 0;
3594 if (res == 2.0)
3595 fpscr.ioc = 1;
3596 FpscrExc = fpscr;
3597 '''
3598 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3599 2, vceqfpCode, toInt = True)
3600 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3601 4, vceqfpCode, toInt = True)
3602
3603 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3604 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3605 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3606 vclefpCode = '''
3607 FPSCR fpscr = (FPSCR) FpscrExc;
3608 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3609 true, true, VfpRoundNearest);
3610 destReg = (res == 0) ? -1 : 0;
3611 if (res == 2.0)
3612 fpscr.ioc = 1;
3613 FpscrExc = fpscr;
3614 '''
3615 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3616 2, vclefpCode, toInt = True)
3617 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3618 4, vclefpCode, toInt = True)
3619
3620 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3621 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3622 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3623 vcltfpCode = '''
3624 FPSCR fpscr = (FPSCR) FpscrExc;
3625 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3626 true, true, VfpRoundNearest);
3627 destReg = (res == 0) ? -1 : 0;
3628 if (res == 2.0)
3629 fpscr.ioc = 1;
3630 FpscrExc = fpscr;
3631 '''
3632 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3633 2, vcltfpCode, toInt = True)
3634 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3635 4, vcltfpCode, toInt = True)
3636
3637 vswpCode = '''
3638 FloatRegBits mid;
3639 for (unsigned r = 0; r < rCount; r++) {
3640 mid = srcReg1.regs[r];
3641 srcReg1.regs[r] = destReg.regs[r];
3642 destReg.regs[r] = mid;
3643 }
3644 '''
3645 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3646 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3647
3648 vtrnCode = '''
3649 Element mid;
3650 for (unsigned i = 0; i < eCount; i += 2) {
3651 mid = srcReg1.elements[i];
3652 srcReg1.elements[i] = destReg.elements[i + 1];
3653 destReg.elements[i + 1] = mid;
3654 }
3655 '''
3656 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3657 smallUnsignedTypes, 2, vtrnCode)
3658 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3659 smallUnsignedTypes, 4, vtrnCode)
3660
3661 vuzpCode = '''
3662 Element mid[eCount];
3663 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3664 for (unsigned i = 0; i < eCount / 2; i++) {
3665 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3666 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3667 destReg.elements[i] = destReg.elements[2 * i];
3668 }
3669 for (unsigned i = 0; i < eCount / 2; i++) {
3670 destReg.elements[eCount / 2 + i] = mid[2 * i];
3671 }
3672 '''
3673 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3674 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3675
3676 vzipCode = '''
3677 Element mid[eCount];
3678 memcpy(&mid, &destReg, sizeof(destReg));
3679 for (unsigned i = 0; i < eCount / 2; i++) {
3680 destReg.elements[2 * i] = mid[i];
3681 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3682 }
3683 for (int i = 0; i < eCount / 2; i++) {
3684 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3685 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3686 }
3687 '''
3688 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3689 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3690
3691 vmovnCode = 'destElem = srcElem1;'
3692 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3693
3694 vdupCode = 'destElem = srcElem1;'
3695 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3696 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3697
3698 def vdupGprInst(name, Name, opClass, types, rCount):
3699 global header_output, exec_output
3700 eWalkCode = simdEnabledCheckCode + '''
3701 RegVect destReg;
3702 for (unsigned i = 0; i < eCount; i++) {
3703 destReg.elements[i] = htog((Element)Op1);
3704 }
3705 '''
3706 for reg in range(rCount):
3707 eWalkCode += '''
3708 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3709 ''' % { "reg" : reg }
3710 iop = InstObjParams(name, Name,
3711 "RegRegOp",
3712 { "code": eWalkCode,
3713 "r_count": rCount,
3714 "predicate_test": predicateTest,
3715 "op_class": opClass }, [])
3716 header_output += NeonRegRegOpDeclare.subst(iop)
3717 exec_output += NeonEqualRegExecute.subst(iop)
3718 for type in types:
3719 substDict = { "targs" : type,
3720 "class_name" : Name }
3721 exec_output += NeonExecDeclare.subst(substDict)
3722 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3723 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3724
3725 vmovCode = 'destElem = imm;'
3726 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3727 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3728
3729 vorrCode = 'destElem |= imm;'
3730 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3731 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3732
3733 vmvnCode = 'destElem = ~imm;'
3734 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3735 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3736
3737 vbicCode = 'destElem &= ~imm;'
3738 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3739 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3740
3741 vqmovnCode = '''
3742 FPSCR fpscr = (FPSCR) FpscrQc;
3743 destElem = srcElem1;
3744 if ((BigElement)destElem != srcElem1) {
3745 fpscr.qc = 1;
3746 destElem = mask(sizeof(Element) * 8 - 1);
3747 if (srcElem1 < 0)
3748 destElem = ~destElem;
3749 }
3750 FpscrQc = fpscr;
3751 '''
3752 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3753
3754 vqmovunCode = '''
3755 FPSCR fpscr = (FPSCR) FpscrQc;
3756 destElem = srcElem1;
3757 if ((BigElement)destElem != srcElem1) {
3758 fpscr.qc = 1;
3759 destElem = mask(sizeof(Element) * 8);
3760 }
3761 FpscrQc = fpscr;
3762 '''
3763 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3764 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3765
3766 vqmovunsCode = '''
3767 FPSCR fpscr = (FPSCR) FpscrQc;
3768 destElem = srcElem1;
3769 if (srcElem1 < 0 ||
3770 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3771 fpscr.qc = 1;
3772 destElem = mask(sizeof(Element) * 8);
3773 if (srcElem1 < 0)
3774 destElem = ~destElem;
3775 }
3776 FpscrQc = fpscr;
3777 '''
3778 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3779 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3780
3781 def buildVext(name, Name, opClass, types, rCount, op):
3782 global header_output, exec_output
3783 eWalkCode = simdEnabledCheckCode + '''
3784 RegVect srcReg1, srcReg2, destReg;
3785 '''
3786 for reg in range(rCount):
3787 eWalkCode += '''
3788 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3789 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3790 ''' % { "reg" : reg }
3791 eWalkCode += op
3792 for reg in range(rCount):
3793 eWalkCode += '''
3794 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3795 ''' % { "reg" : reg }
3796 iop = InstObjParams(name, Name,
3797 "RegRegRegImmOp",
3798 { "code": eWalkCode,
3799 "r_count": rCount,
3800 "predicate_test": predicateTest,
3801 "op_class": opClass }, [])
3802 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3803 exec_output += NeonEqualRegExecute.subst(iop)
3804 for type in types:
3805 substDict = { "targs" : type,
3806 "class_name" : Name }
3807 exec_output += NeonExecDeclare.subst(substDict)
3808
3809 vextCode = '''
3810 for (unsigned i = 0; i < eCount; i++) {
3811 unsigned index = i + imm;
3812 if (index < eCount) {
3813 destReg.elements[i] = srcReg1.elements[index];
3814 } else {
3815 index -= eCount;
3816 if (index >= eCount) {
3817 fault = std::make_shared<UndefinedInstruction>(machInst,
3818 false,
3819 mnemonic);
3820 } else {
3821 destReg.elements[i] = srcReg2.elements[index];
3822 }
3823 }
3824 }
3825 '''
3826 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3827 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3828
3829 def buildVtbxl(name, Name, opClass, length, isVtbl):
3830 global header_output, decoder_output, exec_output
3831 code = simdEnabledCheckCode + '''
3832 union
3833 {
3834 uint8_t bytes[32];
3835 FloatRegBits regs[8];
3836 } table;
3837
3838 union
3839 {
3840 uint8_t bytes[8];
3841 FloatRegBits regs[2];
3842 } destReg, srcReg2;
3843
3844 const unsigned length = %(length)d;
3845 const bool isVtbl = %(isVtbl)s;
3846
3847 srcReg2.regs[0] = htog(FpOp2P0_uw);
3848 srcReg2.regs[1] = htog(FpOp2P1_uw);
3849
3850 destReg.regs[0] = htog(FpDestP0_uw);
3851 destReg.regs[1] = htog(FpDestP1_uw);
3852 ''' % { "length" : length, "isVtbl" : isVtbl }
3853 for reg in range(8):
3854 if reg < length * 2:
3855 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3856 { "reg" : reg }
3857 else:
3858 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3859 code += '''
3860 for (unsigned i = 0; i < sizeof(destReg); i++) {
3861 uint8_t index = srcReg2.bytes[i];
3862 if (index < 8 * length) {
3863 destReg.bytes[i] = table.bytes[index];
3864 } else {
3865 if (isVtbl)
3866 destReg.bytes[i] = 0;
3867 // else destReg.bytes[i] unchanged
3868 }
3869 }
3870
3871 FpDestP0_uw = gtoh(destReg.regs[0]);
3872 FpDestP1_uw = gtoh(destReg.regs[1]);
3873 '''
3874 iop = InstObjParams(name, Name,
3875 "RegRegRegOp",
3876 { "code": code,
3877 "predicate_test": predicateTest,
3878 "op_class": opClass }, [])
3879 header_output += RegRegRegOpDeclare.subst(iop)
3880 decoder_output += RegRegRegOpConstructor.subst(iop)
3881 exec_output += PredOpExecute.subst(iop)
3882
3883 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3884 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3885 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3886 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3887
3888 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3889 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3890 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3891 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3892}};