neon.isa (9517:5ffb5e5c93b4) neon.isa (9557:8666e81607a6)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (std::isnan(op1) || std::isnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (std::isnan(op1) || std::isnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (std::isnan(op1) || std::isnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (std::isnan(op1) || std::isnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (std::isnan(op1) || std::isnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (std::isnan(op1) || std::isnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'
704 else:
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
707 eWalkCode += '''
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
711 if readDest:
712 if toInt:
713 eWalkCode += '''
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
716 else:
717 eWalkCode += '''
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
720 readDestCode = ''
721 if readDest:
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
725 if toInt:
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
728 if pairwise:
729 eWalkCode += '''
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
736 %(readDest)s
737 %(op)s
738 %(writeDest)s
739 }
740 ''' % { "op" : op,
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
744 else:
745 eWalkCode += '''
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
750 %(readDest)s
751 %(op)s
752 %(writeDest)s
753 }
754 ''' % { "op" : op,
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
759 if toInt:
760 eWalkCode += '''
761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:
789 src2Cnt = 4
790 src2Prefix = 'Big'
791 if bigDest:
792 destCnt = 4
793 destPrefix = 'Big'
794 eWalkCode = simdEnabledCheckCode + '''
795 %sRegVect srcReg1;
796 %sRegVect srcReg2;
797 %sRegVect destReg;
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
800 eWalkCode += '''
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
804 eWalkCode += '''
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806 ''' % { "reg" : reg }
807 if readDest:
808 for reg in range(destCnt):
809 eWalkCode += '''
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811 ''' % { "reg" : reg }
812 readDestCode = ''
813 if readDest:
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815 eWalkCode += '''
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
820 %(readDest)s
821 %(op)s
822 destReg.elements[i] = htog(destElem);
823 }
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
847
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
851
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
855
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865 ''' % { "reg" : reg }
866 if readDest:
867 eWalkCode += '''
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869 ''' % { "reg" : reg }
870 readDestCode = ''
871 if readDest:
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873 eWalkCode += '''
874 if (imm < 0 && imm >= eCount) {
875 if (FullSystem)
876 fault = new UndefinedInstruction;
877 else
878 fault = new UndefinedInstruction(false, mnemonic);
879 } else {
880 for (unsigned i = 0; i < eCount; i++) {
881 Element srcElem1 = gtoh(srcReg1.elements[i]);
882 Element srcElem2 = gtoh(srcReg2.elements[imm]);
883 Element destElem;
884 %(readDest)s
885 %(op)s
886 destReg.elements[i] = htog(destElem);
887 }
888 }
889 ''' % { "op" : op, "readDest" : readDestCode }
890 for reg in range(rCount):
891 eWalkCode += '''
892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893 ''' % { "reg" : reg }
894 iop = InstObjParams(name, Name,
895 "RegRegRegImmOp",
896 { "code": eWalkCode,
897 "r_count": rCount,
898 "predicate_test": predicateTest,
899 "op_class": opClass }, [])
900 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901 exec_output += NeonEqualRegExecute.subst(iop)
902 for type in types:
903 substDict = { "targs" : type,
904 "class_name" : Name }
905 exec_output += NeonExecDeclare.subst(substDict)
906
907 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908 global header_output, exec_output
909 rCount = 2
910 eWalkCode = simdEnabledCheckCode + '''
911 RegVect srcReg1, srcReg2;
912 BigRegVect destReg;
913 '''
914 for reg in range(rCount):
915 eWalkCode += '''
916 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918 ''' % { "reg" : reg }
919 if readDest:
920 for reg in range(2 * rCount):
921 eWalkCode += '''
922 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923 ''' % { "reg" : reg }
924 readDestCode = ''
925 if readDest:
926 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
927 eWalkCode += '''
928 if (imm < 0 && imm >= eCount) {
929 if (FullSystem)
930 fault = new UndefinedInstruction;
931 else
932 fault = new UndefinedInstruction(false, mnemonic);
933 } else {
934 for (unsigned i = 0; i < eCount; i++) {
935 Element srcElem1 = gtoh(srcReg1.elements[i]);
936 Element srcElem2 = gtoh(srcReg2.elements[imm]);
937 BigElement destElem;
938 %(readDest)s
939 %(op)s
940 destReg.elements[i] = htog(destElem);
941 }
942 }
943 ''' % { "op" : op, "readDest" : readDestCode }
944 for reg in range(2 * rCount):
945 eWalkCode += '''
946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947 ''' % { "reg" : reg }
948 iop = InstObjParams(name, Name,
949 "RegRegRegImmOp",
950 { "code": eWalkCode,
951 "r_count": rCount,
952 "predicate_test": predicateTest,
953 "op_class": opClass }, [])
954 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955 exec_output += NeonUnequalRegExecute.subst(iop)
956 for type in types:
957 substDict = { "targs" : type,
958 "class_name" : Name }
959 exec_output += NeonExecDeclare.subst(substDict)
960
961 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962 global header_output, exec_output
963 eWalkCode = simdEnabledCheckCode + '''
964 typedef FloatReg FloatVect[rCount];
965 FloatVect srcRegs1, srcRegs2, destRegs;
966 '''
967 for reg in range(rCount):
968 eWalkCode += '''
969 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971 ''' % { "reg" : reg }
972 if readDest:
973 eWalkCode += '''
974 destRegs[%(reg)d] = FpDestP%(reg)d;
975 ''' % { "reg" : reg }
976 readDestCode = ''
977 if readDest:
978 readDestCode = 'destReg = destRegs[i];'
979 eWalkCode += '''
980 if (imm < 0 && imm >= eCount) {
981 if (FullSystem)
982 fault = new UndefinedInstruction;
983 else
984 fault = new UndefinedInstruction(false, mnemonic);
985 } else {
986 for (unsigned i = 0; i < rCount; i++) {
987 FloatReg srcReg1 = srcRegs1[i];
988 FloatReg srcReg2 = srcRegs2[imm];
989 FloatReg destReg;
990 %(readDest)s
991 %(op)s
992 destRegs[i] = destReg;
993 }
994 }
995 ''' % { "op" : op, "readDest" : readDestCode }
996 for reg in range(rCount):
997 eWalkCode += '''
998 FpDestP%(reg)d = destRegs[%(reg)d];
999 ''' % { "reg" : reg }
1000 iop = InstObjParams(name, Name,
1001 "FpRegRegRegImmOp",
1002 { "code": eWalkCode,
1003 "r_count": rCount,
1004 "predicate_test": predicateTest,
1005 "op_class": opClass }, [])
1006 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007 exec_output += NeonEqualRegExecute.subst(iop)
1008 for type in types:
1009 substDict = { "targs" : type,
1010 "class_name" : Name }
1011 exec_output += NeonExecDeclare.subst(substDict)
1012
1013 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014 readDest=False, toInt=False, fromInt=False):
1015 global header_output, exec_output
1016 eWalkCode = simdEnabledCheckCode + '''
1017 RegVect srcRegs1, destRegs;
1018 '''
1019 for reg in range(rCount):
1020 eWalkCode += '''
1021 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022 ''' % { "reg" : reg }
1023 if readDest:
1024 eWalkCode += '''
1025 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026 ''' % { "reg" : reg }
1027 readDestCode = ''
1028 if readDest:
1029 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1030 if toInt:
1031 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1033 if fromInt:
1034 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035 declDest = 'Element destElem;'
1036 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1037 if toInt:
1038 declDest = 'FloatRegBits destReg;'
1039 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1040 eWalkCode += '''
1041 for (unsigned i = 0; i < eCount; i++) {
1042 %(readOp)s
1043 %(declDest)s
1044 %(readDest)s
1045 %(op)s
1046 %(writeDest)s
1047 }
1048 ''' % { "readOp" : readOpCode,
1049 "declDest" : declDest,
1050 "readDest" : readDestCode,
1051 "op" : op,
1052 "writeDest" : writeDestCode }
1053 for reg in range(rCount):
1054 eWalkCode += '''
1055 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056 ''' % { "reg" : reg }
1057 iop = InstObjParams(name, Name,
1058 "RegRegImmOp",
1059 { "code": eWalkCode,
1060 "r_count": rCount,
1061 "predicate_test": predicateTest,
1062 "op_class": opClass }, [])
1063 header_output += NeonRegRegImmOpDeclare.subst(iop)
1064 exec_output += NeonEqualRegExecute.subst(iop)
1065 for type in types:
1066 substDict = { "targs" : type,
1067 "class_name" : Name }
1068 exec_output += NeonExecDeclare.subst(substDict)
1069
1070 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071 global header_output, exec_output
1072 eWalkCode = simdEnabledCheckCode + '''
1073 BigRegVect srcReg1;
1074 RegVect destReg;
1075 '''
1076 for reg in range(4):
1077 eWalkCode += '''
1078 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079 ''' % { "reg" : reg }
1080 if readDest:
1081 for reg in range(2):
1082 eWalkCode += '''
1083 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084 ''' % { "reg" : reg }
1085 readDestCode = ''
1086 if readDest:
1087 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1088 eWalkCode += '''
1089 for (unsigned i = 0; i < eCount; i++) {
1090 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1091 Element destElem;
1092 %(readDest)s
1093 %(op)s
1094 destReg.elements[i] = htog(destElem);
1095 }
1096 ''' % { "op" : op, "readDest" : readDestCode }
1097 for reg in range(2):
1098 eWalkCode += '''
1099 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100 ''' % { "reg" : reg }
1101 iop = InstObjParams(name, Name,
1102 "RegRegImmOp",
1103 { "code": eWalkCode,
1104 "r_count": 2,
1105 "predicate_test": predicateTest,
1106 "op_class": opClass }, [])
1107 header_output += NeonRegRegImmOpDeclare.subst(iop)
1108 exec_output += NeonUnequalRegExecute.subst(iop)
1109 for type in types:
1110 substDict = { "targs" : type,
1111 "class_name" : Name }
1112 exec_output += NeonExecDeclare.subst(substDict)
1113
1114 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115 global header_output, exec_output
1116 eWalkCode = simdEnabledCheckCode + '''
1117 RegVect srcReg1;
1118 BigRegVect destReg;
1119 '''
1120 for reg in range(2):
1121 eWalkCode += '''
1122 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123 ''' % { "reg" : reg }
1124 if readDest:
1125 for reg in range(4):
1126 eWalkCode += '''
1127 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128 ''' % { "reg" : reg }
1129 readDestCode = ''
1130 if readDest:
1131 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1132 eWalkCode += '''
1133 for (unsigned i = 0; i < eCount; i++) {
1134 Element srcElem1 = gtoh(srcReg1.elements[i]);
1135 BigElement destElem;
1136 %(readDest)s
1137 %(op)s
1138 destReg.elements[i] = htog(destElem);
1139 }
1140 ''' % { "op" : op, "readDest" : readDestCode }
1141 for reg in range(4):
1142 eWalkCode += '''
1143 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144 ''' % { "reg" : reg }
1145 iop = InstObjParams(name, Name,
1146 "RegRegImmOp",
1147 { "code": eWalkCode,
1148 "r_count": 2,
1149 "predicate_test": predicateTest,
1150 "op_class": opClass }, [])
1151 header_output += NeonRegRegImmOpDeclare.subst(iop)
1152 exec_output += NeonUnequalRegExecute.subst(iop)
1153 for type in types:
1154 substDict = { "targs" : type,
1155 "class_name" : Name }
1156 exec_output += NeonExecDeclare.subst(substDict)
1157
1158 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159 global header_output, exec_output
1160 eWalkCode = simdEnabledCheckCode + '''
1161 RegVect srcReg1, destReg;
1162 '''
1163 for reg in range(rCount):
1164 eWalkCode += '''
1165 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166 ''' % { "reg" : reg }
1167 if readDest:
1168 eWalkCode += '''
1169 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170 ''' % { "reg" : reg }
1171 readDestCode = ''
1172 if readDest:
1173 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1174 eWalkCode += '''
1175 for (unsigned i = 0; i < eCount; i++) {
1176 unsigned j = i;
1177 Element srcElem1 = gtoh(srcReg1.elements[i]);
1178 Element destElem;
1179 %(readDest)s
1180 %(op)s
1181 destReg.elements[j] = htog(destElem);
1182 }
1183 ''' % { "op" : op, "readDest" : readDestCode }
1184 for reg in range(rCount):
1185 eWalkCode += '''
1186 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187 ''' % { "reg" : reg }
1188 iop = InstObjParams(name, Name,
1189 "RegRegOp",
1190 { "code": eWalkCode,
1191 "r_count": rCount,
1192 "predicate_test": predicateTest,
1193 "op_class": opClass }, [])
1194 header_output += NeonRegRegOpDeclare.subst(iop)
1195 exec_output += NeonEqualRegExecute.subst(iop)
1196 for type in types:
1197 substDict = { "targs" : type,
1198 "class_name" : Name }
1199 exec_output += NeonExecDeclare.subst(substDict)
1200
1201 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202 global header_output, exec_output
1203 eWalkCode = simdEnabledCheckCode + '''
1204 RegVect srcReg1, destReg;
1205 '''
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209 ''' % { "reg" : reg }
1210 if readDest:
1211 eWalkCode += '''
1212 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213 ''' % { "reg" : reg }
1214 readDestCode = ''
1215 if readDest:
1216 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1217 eWalkCode += '''
1218 for (unsigned i = 0; i < eCount; i++) {
1219 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1220 Element destElem;
1221 %(readDest)s
1222 %(op)s
1223 destReg.elements[i] = htog(destElem);
1224 }
1225 ''' % { "op" : op, "readDest" : readDestCode }
1226 for reg in range(rCount):
1227 eWalkCode += '''
1228 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229 ''' % { "reg" : reg }
1230 iop = InstObjParams(name, Name,
1231 "RegRegImmOp",
1232 { "code": eWalkCode,
1233 "r_count": rCount,
1234 "predicate_test": predicateTest,
1235 "op_class": opClass }, [])
1236 header_output += NeonRegRegImmOpDeclare.subst(iop)
1237 exec_output += NeonEqualRegExecute.subst(iop)
1238 for type in types:
1239 substDict = { "targs" : type,
1240 "class_name" : Name }
1241 exec_output += NeonExecDeclare.subst(substDict)
1242
1243 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244 global header_output, exec_output
1245 eWalkCode = simdEnabledCheckCode + '''
1246 RegVect srcReg1, destReg;
1247 '''
1248 for reg in range(rCount):
1249 eWalkCode += '''
1250 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252 ''' % { "reg" : reg }
1253 if readDest:
1254 eWalkCode += '''
1255 ''' % { "reg" : reg }
1256 readDestCode = ''
1257 if readDest:
1258 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1259 eWalkCode += op
1260 for reg in range(rCount):
1261 eWalkCode += '''
1262 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264 ''' % { "reg" : reg }
1265 iop = InstObjParams(name, Name,
1266 "RegRegOp",
1267 { "code": eWalkCode,
1268 "r_count": rCount,
1269 "predicate_test": predicateTest,
1270 "op_class": opClass }, [])
1271 header_output += NeonRegRegOpDeclare.subst(iop)
1272 exec_output += NeonEqualRegExecute.subst(iop)
1273 for type in types:
1274 substDict = { "targs" : type,
1275 "class_name" : Name }
1276 exec_output += NeonExecDeclare.subst(substDict)
1277
1278 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279 readDest=False, toInt=False):
1280 global header_output, exec_output
1281 eWalkCode = simdEnabledCheckCode + '''
1282 typedef FloatReg FloatVect[rCount];
1283 FloatVect srcRegs1;
1284 '''
1285 if toInt:
1286 eWalkCode += 'RegVect destRegs;\n'
1287 else:
1288 eWalkCode += 'FloatVect destRegs;\n'
1289 for reg in range(rCount):
1290 eWalkCode += '''
1291 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292 ''' % { "reg" : reg }
1293 if readDest:
1294 if toInt:
1295 eWalkCode += '''
1296 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297 ''' % { "reg" : reg }
1298 else:
1299 eWalkCode += '''
1300 destRegs[%(reg)d] = FpDestP%(reg)d;
1301 ''' % { "reg" : reg }
1302 readDestCode = ''
1303 if readDest:
1304 readDestCode = 'destReg = destRegs[i];'
1305 destType = 'FloatReg'
1306 writeDest = 'destRegs[r] = destReg;'
1307 if toInt:
1308 destType = 'FloatRegBits'
1309 writeDest = 'destRegs.regs[r] = destReg;'
1310 eWalkCode += '''
1311 for (unsigned r = 0; r < rCount; r++) {
1312 FloatReg srcReg1 = srcRegs1[r];
1313 %(destType)s destReg;
1314 %(readDest)s
1315 %(op)s
1316 %(writeDest)s
1317 }
1318 ''' % { "op" : op,
1319 "readDest" : readDestCode,
1320 "destType" : destType,
1321 "writeDest" : writeDest }
1322 for reg in range(rCount):
1323 if toInt:
1324 eWalkCode += '''
1325 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326 ''' % { "reg" : reg }
1327 else:
1328 eWalkCode += '''
1329 FpDestP%(reg)d = destRegs[%(reg)d];
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "FpRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": rCount,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegOpDeclare.subst(iop)
1338 exec_output += NeonEqualRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345 global header_output, exec_output
1346 eWalkCode = simdEnabledCheckCode + '''
1347 RegVect srcRegs;
1348 BigRegVect destReg;
1349 '''
1350 for reg in range(rCount):
1351 eWalkCode += '''
1352 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353 ''' % { "reg" : reg }
1354 if readDest:
1355 eWalkCode += '''
1356 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357 ''' % { "reg" : reg }
1358 readDestCode = ''
1359 if readDest:
1360 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1361 eWalkCode += '''
1362 for (unsigned i = 0; i < eCount / 2; i++) {
1363 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365 BigElement destElem;
1366 %(readDest)s
1367 %(op)s
1368 destReg.elements[i] = htog(destElem);
1369 }
1370 ''' % { "op" : op, "readDest" : readDestCode }
1371 for reg in range(rCount):
1372 eWalkCode += '''
1373 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374 ''' % { "reg" : reg }
1375 iop = InstObjParams(name, Name,
1376 "RegRegOp",
1377 { "code": eWalkCode,
1378 "r_count": rCount,
1379 "predicate_test": predicateTest,
1380 "op_class": opClass }, [])
1381 header_output += NeonRegRegOpDeclare.subst(iop)
1382 exec_output += NeonUnequalRegExecute.subst(iop)
1383 for type in types:
1384 substDict = { "targs" : type,
1385 "class_name" : Name }
1386 exec_output += NeonExecDeclare.subst(substDict)
1387
1388 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389 global header_output, exec_output
1390 eWalkCode = simdEnabledCheckCode + '''
1391 BigRegVect srcReg1;
1392 RegVect destReg;
1393 '''
1394 for reg in range(4):
1395 eWalkCode += '''
1396 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397 ''' % { "reg" : reg }
1398 if readDest:
1399 for reg in range(2):
1400 eWalkCode += '''
1401 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402 ''' % { "reg" : reg }
1403 readDestCode = ''
1404 if readDest:
1405 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1406 eWalkCode += '''
1407 for (unsigned i = 0; i < eCount; i++) {
1408 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1409 Element destElem;
1410 %(readDest)s
1411 %(op)s
1412 destReg.elements[i] = htog(destElem);
1413 }
1414 ''' % { "op" : op, "readDest" : readDestCode }
1415 for reg in range(2):
1416 eWalkCode += '''
1417 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418 ''' % { "reg" : reg }
1419 iop = InstObjParams(name, Name,
1420 "RegRegOp",
1421 { "code": eWalkCode,
1422 "r_count": 2,
1423 "predicate_test": predicateTest,
1424 "op_class": opClass }, [])
1425 header_output += NeonRegRegOpDeclare.subst(iop)
1426 exec_output += NeonUnequalRegExecute.subst(iop)
1427 for type in types:
1428 substDict = { "targs" : type,
1429 "class_name" : Name }
1430 exec_output += NeonExecDeclare.subst(substDict)
1431
1432 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433 global header_output, exec_output
1434 eWalkCode = simdEnabledCheckCode + '''
1435 RegVect destReg;
1436 '''
1437 if readDest:
1438 for reg in range(rCount):
1439 eWalkCode += '''
1440 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441 ''' % { "reg" : reg }
1442 readDestCode = ''
1443 if readDest:
1444 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1445 eWalkCode += '''
1446 for (unsigned i = 0; i < eCount; i++) {
1447 Element destElem;
1448 %(readDest)s
1449 %(op)s
1450 destReg.elements[i] = htog(destElem);
1451 }
1452 ''' % { "op" : op, "readDest" : readDestCode }
1453 for reg in range(rCount):
1454 eWalkCode += '''
1455 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456 ''' % { "reg" : reg }
1457 iop = InstObjParams(name, Name,
1458 "RegImmOp",
1459 { "code": eWalkCode,
1460 "r_count": rCount,
1461 "predicate_test": predicateTest,
1462 "op_class": opClass }, [])
1463 header_output += NeonRegImmOpDeclare.subst(iop)
1464 exec_output += NeonEqualRegExecute.subst(iop)
1465 for type in types:
1466 substDict = { "targs" : type,
1467 "class_name" : Name }
1468 exec_output += NeonExecDeclare.subst(substDict)
1469
1470 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471 global header_output, exec_output
1472 eWalkCode = simdEnabledCheckCode + '''
1473 RegVect srcReg1;
1474 BigRegVect destReg;
1475 '''
1476 for reg in range(2):
1477 eWalkCode += '''
1478 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479 ''' % { "reg" : reg }
1480 if readDest:
1481 for reg in range(4):
1482 eWalkCode += '''
1483 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484 ''' % { "reg" : reg }
1485 readDestCode = ''
1486 if readDest:
1487 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1488 eWalkCode += '''
1489 for (unsigned i = 0; i < eCount; i++) {
1490 Element srcElem1 = gtoh(srcReg1.elements[i]);
1491 BigElement destElem;
1492 %(readDest)s
1493 %(op)s
1494 destReg.elements[i] = htog(destElem);
1495 }
1496 ''' % { "op" : op, "readDest" : readDestCode }
1497 for reg in range(4):
1498 eWalkCode += '''
1499 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500 ''' % { "reg" : reg }
1501 iop = InstObjParams(name, Name,
1502 "RegRegOp",
1503 { "code": eWalkCode,
1504 "r_count": 2,
1505 "predicate_test": predicateTest,
1506 "op_class": opClass }, [])
1507 header_output += NeonRegRegOpDeclare.subst(iop)
1508 exec_output += NeonUnequalRegExecute.subst(iop)
1509 for type in types:
1510 substDict = { "targs" : type,
1511 "class_name" : Name }
1512 exec_output += NeonExecDeclare.subst(substDict)
1513
1514 vhaddCode = '''
1515 Element carryBit =
1516 (((unsigned)srcElem1 & 0x1) +
1517 ((unsigned)srcElem2 & 0x1)) >> 1;
1518 // Use division instead of a shift to ensure the sign extension works
1519 // right. The compiler will figure out if it can be a shift. Mask the
1520 // inputs so they get truncated correctly.
1521 destElem = (((srcElem1 & ~(Element)1) / 2) +
1522 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1523 '''
1524 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1526
1527 vrhaddCode = '''
1528 Element carryBit =
1529 (((unsigned)srcElem1 & 0x1) +
1530 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531 // Use division instead of a shift to ensure the sign extension works
1532 // right. The compiler will figure out if it can be a shift. Mask the
1533 // inputs so they get truncated correctly.
1534 destElem = (((srcElem1 & ~(Element)1) / 2) +
1535 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1536 '''
1537 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1539
1540 vhsubCode = '''
1541 Element barrowBit =
1542 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543 // Use division instead of a shift to ensure the sign extension works
1544 // right. The compiler will figure out if it can be a shift. Mask the
1545 // inputs so they get truncated correctly.
1546 destElem = (((srcElem1 & ~(Element)1) / 2) -
1547 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1548 '''
1549 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1551
1552 vandCode = '''
1553 destElem = srcElem1 & srcElem2;
1554 '''
1555 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1557
1558 vbicCode = '''
1559 destElem = srcElem1 & ~srcElem2;
1560 '''
1561 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1563
1564 vorrCode = '''
1565 destElem = srcElem1 | srcElem2;
1566 '''
1567 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1569
1570 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1572
1573 vornCode = '''
1574 destElem = srcElem1 | ~srcElem2;
1575 '''
1576 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1578
1579 veorCode = '''
1580 destElem = srcElem1 ^ srcElem2;
1581 '''
1582 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1584
1585 vbifCode = '''
1586 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1587 '''
1588 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1590 vbitCode = '''
1591 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1592 '''
1593 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1595 vbslCode = '''
1596 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1597 '''
1598 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1600
1601 vmaxCode = '''
1602 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1603 '''
1604 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1606
1607 vminCode = '''
1608 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1609 '''
1610 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613 vaddCode = '''
1614 destElem = srcElem1 + srcElem2;
1615 '''
1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1620 2, vaddCode, pairwise=True)
1621 vaddlwCode = '''
1622 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1623 '''
1624 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1625 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1626 vaddhnCode = '''
1627 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1628 (sizeof(Element) * 8);
1629 '''
1630 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1631 vraddhnCode = '''
1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1633 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1634 (sizeof(Element) * 8);
1635 '''
1636 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1637
1638 vsubCode = '''
1639 destElem = srcElem1 - srcElem2;
1640 '''
1641 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1642 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1643 vsublwCode = '''
1644 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1645 '''
1646 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1647 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1648
1649 vqaddUCode = '''
1650 destElem = srcElem1 + srcElem2;
1651 FPSCR fpscr = (FPSCR) FpscrQc;
1652 if (destElem < srcElem1 || destElem < srcElem2) {
1653 destElem = (Element)(-1);
1654 fpscr.qc = 1;
1655 }
1656 FpscrQc = fpscr;
1657 '''
1658 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1659 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1660 vsubhnCode = '''
1661 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1662 (sizeof(Element) * 8);
1663 '''
1664 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1665 vrsubhnCode = '''
1666 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1667 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1668 (sizeof(Element) * 8);
1669 '''
1670 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1671
1672 vqaddSCode = '''
1673 destElem = srcElem1 + srcElem2;
1674 FPSCR fpscr = (FPSCR) FpscrQc;
1675 bool negDest = (destElem < 0);
1676 bool negSrc1 = (srcElem1 < 0);
1677 bool negSrc2 = (srcElem2 < 0);
1678 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1679 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1680 if (negDest)
1681 destElem -= 1;
1682 fpscr.qc = 1;
1683 }
1684 FpscrQc = fpscr;
1685 '''
1686 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1687 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1688
1689 vqsubUCode = '''
1690 destElem = srcElem1 - srcElem2;
1691 FPSCR fpscr = (FPSCR) FpscrQc;
1692 if (destElem > srcElem1) {
1693 destElem = 0;
1694 fpscr.qc = 1;
1695 }
1696 FpscrQc = fpscr;
1697 '''
1698 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1699 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1700
1701 vqsubSCode = '''
1702 destElem = srcElem1 - srcElem2;
1703 FPSCR fpscr = (FPSCR) FpscrQc;
1704 bool negDest = (destElem < 0);
1705 bool negSrc1 = (srcElem1 < 0);
1706 bool posSrc2 = (srcElem2 >= 0);
1707 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1708 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1709 if (negDest)
1710 destElem -= 1;
1711 fpscr.qc = 1;
1712 }
1713 FpscrQc = fpscr;
1714 '''
1715 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1716 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1717
1718 vcgtCode = '''
1719 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1720 '''
1721 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1722 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1723
1724 vcgeCode = '''
1725 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1726 '''
1727 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1728 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1729
1730 vceqCode = '''
1731 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1732 '''
1733 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1734 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1735
1736 vshlCode = '''
1737 int16_t shiftAmt = (int8_t)srcElem2;
1738 if (shiftAmt < 0) {
1739 shiftAmt = -shiftAmt;
1740 if (shiftAmt >= sizeof(Element) * 8) {
1741 shiftAmt = sizeof(Element) * 8 - 1;
1742 destElem = 0;
1743 } else {
1744 destElem = (srcElem1 >> shiftAmt);
1745 }
1746 // Make sure the right shift sign extended when it should.
1747 if (ltz(srcElem1) && !ltz(destElem)) {
1748 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1749 1 - shiftAmt));
1750 }
1751 } else {
1752 if (shiftAmt >= sizeof(Element) * 8) {
1753 destElem = 0;
1754 } else {
1755 destElem = srcElem1 << shiftAmt;
1756 }
1757 }
1758 '''
1759 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1760 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1761
1762 vrshlCode = '''
1763 int16_t shiftAmt = (int8_t)srcElem2;
1764 if (shiftAmt < 0) {
1765 shiftAmt = -shiftAmt;
1766 Element rBit = 0;
1767 if (shiftAmt <= sizeof(Element) * 8)
1768 rBit = bits(srcElem1, shiftAmt - 1);
1769 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1770 rBit = 1;
1771 if (shiftAmt >= sizeof(Element) * 8) {
1772 shiftAmt = sizeof(Element) * 8 - 1;
1773 destElem = 0;
1774 } else {
1775 destElem = (srcElem1 >> shiftAmt);
1776 }
1777 // Make sure the right shift sign extended when it should.
1778 if (ltz(srcElem1) && !ltz(destElem)) {
1779 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1780 1 - shiftAmt));
1781 }
1782 destElem += rBit;
1783 } else if (shiftAmt > 0) {
1784 if (shiftAmt >= sizeof(Element) * 8) {
1785 destElem = 0;
1786 } else {
1787 destElem = srcElem1 << shiftAmt;
1788 }
1789 } else {
1790 destElem = srcElem1;
1791 }
1792 '''
1793 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1794 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1795
1796 vqshlUCode = '''
1797 int16_t shiftAmt = (int8_t)srcElem2;
1798 FPSCR fpscr = (FPSCR) FpscrQc;
1799 if (shiftAmt < 0) {
1800 shiftAmt = -shiftAmt;
1801 if (shiftAmt >= sizeof(Element) * 8) {
1802 shiftAmt = sizeof(Element) * 8 - 1;
1803 destElem = 0;
1804 } else {
1805 destElem = (srcElem1 >> shiftAmt);
1806 }
1807 } else if (shiftAmt > 0) {
1808 if (shiftAmt >= sizeof(Element) * 8) {
1809 if (srcElem1 != 0) {
1810 destElem = mask(sizeof(Element) * 8);
1811 fpscr.qc = 1;
1812 } else {
1813 destElem = 0;
1814 }
1815 } else {
1816 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1817 sizeof(Element) * 8 - shiftAmt)) {
1818 destElem = mask(sizeof(Element) * 8);
1819 fpscr.qc = 1;
1820 } else {
1821 destElem = srcElem1 << shiftAmt;
1822 }
1823 }
1824 } else {
1825 destElem = srcElem1;
1826 }
1827 FpscrQc = fpscr;
1828 '''
1829 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1830 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1831
1832 vqshlSCode = '''
1833 int16_t shiftAmt = (int8_t)srcElem2;
1834 FPSCR fpscr = (FPSCR) FpscrQc;
1835 if (shiftAmt < 0) {
1836 shiftAmt = -shiftAmt;
1837 if (shiftAmt >= sizeof(Element) * 8) {
1838 shiftAmt = sizeof(Element) * 8 - 1;
1839 destElem = 0;
1840 } else {
1841 destElem = (srcElem1 >> shiftAmt);
1842 }
1843 // Make sure the right shift sign extended when it should.
1844 if (srcElem1 < 0 && destElem >= 0) {
1845 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1846 1 - shiftAmt));
1847 }
1848 } else if (shiftAmt > 0) {
1849 bool sat = false;
1850 if (shiftAmt >= sizeof(Element) * 8) {
1851 if (srcElem1 != 0)
1852 sat = true;
1853 else
1854 destElem = 0;
1855 } else {
1856 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1857 sizeof(Element) * 8 - 1 - shiftAmt) !=
1858 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1859 sat = true;
1860 } else {
1861 destElem = srcElem1 << shiftAmt;
1862 }
1863 }
1864 if (sat) {
1865 fpscr.qc = 1;
1866 destElem = mask(sizeof(Element) * 8 - 1);
1867 if (srcElem1 < 0)
1868 destElem = ~destElem;
1869 }
1870 } else {
1871 destElem = srcElem1;
1872 }
1873 FpscrQc = fpscr;
1874 '''
1875 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1876 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1877
1878 vqrshlUCode = '''
1879 int16_t shiftAmt = (int8_t)srcElem2;
1880 FPSCR fpscr = (FPSCR) FpscrQc;
1881 if (shiftAmt < 0) {
1882 shiftAmt = -shiftAmt;
1883 Element rBit = 0;
1884 if (shiftAmt <= sizeof(Element) * 8)
1885 rBit = bits(srcElem1, shiftAmt - 1);
1886 if (shiftAmt >= sizeof(Element) * 8) {
1887 shiftAmt = sizeof(Element) * 8 - 1;
1888 destElem = 0;
1889 } else {
1890 destElem = (srcElem1 >> shiftAmt);
1891 }
1892 destElem += rBit;
1893 } else {
1894 if (shiftAmt >= sizeof(Element) * 8) {
1895 if (srcElem1 != 0) {
1896 destElem = mask(sizeof(Element) * 8);
1897 fpscr.qc = 1;
1898 } else {
1899 destElem = 0;
1900 }
1901 } else {
1902 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1903 sizeof(Element) * 8 - shiftAmt)) {
1904 destElem = mask(sizeof(Element) * 8);
1905 fpscr.qc = 1;
1906 } else {
1907 destElem = srcElem1 << shiftAmt;
1908 }
1909 }
1910 }
1911 FpscrQc = fpscr;
1912 '''
1913 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1914 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1915
1916 vqrshlSCode = '''
1917 int16_t shiftAmt = (int8_t)srcElem2;
1918 FPSCR fpscr = (FPSCR) FpscrQc;
1919 if (shiftAmt < 0) {
1920 shiftAmt = -shiftAmt;
1921 Element rBit = 0;
1922 if (shiftAmt <= sizeof(Element) * 8)
1923 rBit = bits(srcElem1, shiftAmt - 1);
1924 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1925 rBit = 1;
1926 if (shiftAmt >= sizeof(Element) * 8) {
1927 shiftAmt = sizeof(Element) * 8 - 1;
1928 destElem = 0;
1929 } else {
1930 destElem = (srcElem1 >> shiftAmt);
1931 }
1932 // Make sure the right shift sign extended when it should.
1933 if (srcElem1 < 0 && destElem >= 0) {
1934 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1935 1 - shiftAmt));
1936 }
1937 destElem += rBit;
1938 } else if (shiftAmt > 0) {
1939 bool sat = false;
1940 if (shiftAmt >= sizeof(Element) * 8) {
1941 if (srcElem1 != 0)
1942 sat = true;
1943 else
1944 destElem = 0;
1945 } else {
1946 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1947 sizeof(Element) * 8 - 1 - shiftAmt) !=
1948 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1949 sat = true;
1950 } else {
1951 destElem = srcElem1 << shiftAmt;
1952 }
1953 }
1954 if (sat) {
1955 fpscr.qc = 1;
1956 destElem = mask(sizeof(Element) * 8 - 1);
1957 if (srcElem1 < 0)
1958 destElem = ~destElem;
1959 }
1960 } else {
1961 destElem = srcElem1;
1962 }
1963 FpscrQc = fpscr;
1964 '''
1965 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1966 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1967
1968 vabaCode = '''
1969 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1970 (srcElem2 - srcElem1);
1971 '''
1972 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1973 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1974 vabalCode = '''
1975 destElem += (srcElem1 > srcElem2) ?
1976 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1977 ((BigElement)srcElem2 - (BigElement)srcElem1);
1978 '''
1979 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1980
1981 vabdCode = '''
1982 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1983 (srcElem2 - srcElem1);
1984 '''
1985 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1986 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1987 vabdlCode = '''
1988 destElem = (srcElem1 > srcElem2) ?
1989 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1990 ((BigElement)srcElem2 - (BigElement)srcElem1);
1991 '''
1992 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1993
1994 vtstCode = '''
1995 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1996 '''
1997 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1998 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1999
2000 vmulCode = '''
2001 destElem = srcElem1 * srcElem2;
2002 '''
2003 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2004 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2005 vmullCode = '''
2006 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2007 '''
2008 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2009
2010 vmlaCode = '''
2011 destElem = destElem + srcElem1 * srcElem2;
2012 '''
2013 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2014 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2015 vmlalCode = '''
2016 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2017 '''
2018 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2019
2020 vqdmlalCode = '''
2021 FPSCR fpscr = (FPSCR) FpscrQc;
2022 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2023 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2024 Element halfNeg = maxNeg / 2;
2025 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2026 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2027 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2028 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2029 fpscr.qc = 1;
2030 }
2031 bool negPreDest = ltz(destElem);
2032 destElem += midElem;
2033 bool negDest = ltz(destElem);
2034 bool negMid = ltz(midElem);
2035 if (negPreDest == negMid && negMid != negDest) {
2036 destElem = mask(sizeof(BigElement) * 8 - 1);
2037 if (negPreDest)
2038 destElem = ~destElem;
2039 fpscr.qc = 1;
2040 }
2041 FpscrQc = fpscr;
2042 '''
2043 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2044
2045 vqdmlslCode = '''
2046 FPSCR fpscr = (FPSCR) FpscrQc;
2047 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2048 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2049 Element halfNeg = maxNeg / 2;
2050 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2051 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2052 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2053 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2054 fpscr.qc = 1;
2055 }
2056 bool negPreDest = ltz(destElem);
2057 destElem -= midElem;
2058 bool negDest = ltz(destElem);
2059 bool posMid = ltz((BigElement)-midElem);
2060 if (negPreDest == posMid && posMid != negDest) {
2061 destElem = mask(sizeof(BigElement) * 8 - 1);
2062 if (negPreDest)
2063 destElem = ~destElem;
2064 fpscr.qc = 1;
2065 }
2066 FpscrQc = fpscr;
2067 '''
2068 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2069
2070 vqdmullCode = '''
2071 FPSCR fpscr = (FPSCR) FpscrQc;
2072 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2073 if (srcElem1 == srcElem2 &&
2074 srcElem1 == (Element)((Element)1 <<
2075 (Element)(sizeof(Element) * 8 - 1))) {
2076 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2077 fpscr.qc = 1;
2078 }
2079 FpscrQc = fpscr;
2080 '''
2081 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2082
2083 vmlsCode = '''
2084 destElem = destElem - srcElem1 * srcElem2;
2085 '''
2086 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2087 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2088 vmlslCode = '''
2089 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2090 '''
2091 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2092
2093 vmulpCode = '''
2094 destElem = 0;
2095 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2096 if (bits(srcElem2, j))
2097 destElem ^= srcElem1 << j;
2098 }
2099 '''
2100 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2101 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2102 vmullpCode = '''
2103 destElem = 0;
2104 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2105 if (bits(srcElem2, j))
2106 destElem ^= (BigElement)srcElem1 << j;
2107 }
2108 '''
2109 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2110
2111 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2112
2113 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2114
2115 vqdmulhCode = '''
2116 FPSCR fpscr = (FPSCR) FpscrQc;
2117 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2118 (sizeof(Element) * 8);
2119 if (srcElem1 == srcElem2 &&
2120 srcElem1 == (Element)((Element)1 <<
2121 (sizeof(Element) * 8 - 1))) {
2122 destElem = ~srcElem1;
2123 fpscr.qc = 1;
2124 }
2125 FpscrQc = fpscr;
2126 '''
2127 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2128 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2129
2130 vqrdmulhCode = '''
2131 FPSCR fpscr = (FPSCR) FpscrQc;
2132 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2133 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2134 (sizeof(Element) * 8);
2135 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2136 Element halfNeg = maxNeg / 2;
2137 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2138 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2139 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2140 if (destElem < 0) {
2141 destElem = mask(sizeof(Element) * 8 - 1);
2142 } else {
2143 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2144 }
2145 fpscr.qc = 1;
2146 }
2147 FpscrQc = fpscr;
2148 '''
2149 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2150 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2151 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2152 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2153
2154 vmaxfpCode = '''
2155 FPSCR fpscr = (FPSCR) FpscrExc;
2156 bool done;
2157 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2158 if (!done) {
2159 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2160 true, true, VfpRoundNearest);
2161 } else if (flushToZero(srcReg1, srcReg2)) {
2162 fpscr.idc = 1;
2163 }
2164 FpscrExc = fpscr;
2165 '''
2166 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2167 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2168
2169 vminfpCode = '''
2170 FPSCR fpscr = (FPSCR) FpscrExc;
2171 bool done;
2172 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2173 if (!done) {
2174 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2175 true, true, VfpRoundNearest);
2176 } else if (flushToZero(srcReg1, srcReg2)) {
2177 fpscr.idc = 1;
2178 }
2179 FpscrExc = fpscr;
2180 '''
2181 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2182 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2183
2184 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2185 2, vmaxfpCode, pairwise=True)
2186 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2187 4, vmaxfpCode, pairwise=True)
2188
2189 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2190 2, vminfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2192 4, vminfpCode, pairwise=True)
2193
2194 vaddfpCode = '''
2195 FPSCR fpscr = (FPSCR) FpscrExc;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2197 true, true, VfpRoundNearest);
2198 FpscrExc = fpscr;
2199 '''
2200 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2201 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2202
2203 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2204 2, vaddfpCode, pairwise=True)
2205 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2206 4, vaddfpCode, pairwise=True)
2207
2208 vsubfpCode = '''
2209 FPSCR fpscr = (FPSCR) FpscrExc;
2210 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2211 true, true, VfpRoundNearest);
2212 FpscrExc = fpscr;
2213 '''
2214 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2215 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2216
2217 vmulfpCode = '''
2218 FPSCR fpscr = (FPSCR) FpscrExc;
2219 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2220 true, true, VfpRoundNearest);
2221 FpscrExc = fpscr;
2222 '''
2223 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2224 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2225
2226 vmlafpCode = '''
2227 FPSCR fpscr = (FPSCR) FpscrExc;
2228 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2229 true, true, VfpRoundNearest);
2230 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2231 true, true, VfpRoundNearest);
2232 FpscrExc = fpscr;
2233 '''
2234 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2235 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2236
2237 vmlsfpCode = '''
2238 FPSCR fpscr = (FPSCR) FpscrExc;
2239 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2240 true, true, VfpRoundNearest);
2241 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2242 true, true, VfpRoundNearest);
2243 FpscrExc = fpscr;
2244 '''
2245 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2246 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2247
2248 vcgtfpCode = '''
2249 FPSCR fpscr = (FPSCR) FpscrExc;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2253 if (res == 2.0)
2254 fpscr.ioc = 1;
2255 FpscrExc = fpscr;
2256 '''
2257 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgtfpCode, toInt = True)
2259 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgtfpCode, toInt = True)
2261
2262 vcgefpCode = '''
2263 FPSCR fpscr = (FPSCR) FpscrExc;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2267 if (res == 2.0)
2268 fpscr.ioc = 1;
2269 FpscrExc = fpscr;
2270 '''
2271 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2272 2, vcgefpCode, toInt = True)
2273 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2274 4, vcgefpCode, toInt = True)
2275
2276 vacgtfpCode = '''
2277 FPSCR fpscr = (FPSCR) FpscrExc;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2281 if (res == 2.0)
2282 fpscr.ioc = 1;
2283 FpscrExc = fpscr;
2284 '''
2285 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgtfpCode, toInt = True)
2287 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgtfpCode, toInt = True)
2289
2290 vacgefpCode = '''
2291 FPSCR fpscr = (FPSCR) FpscrExc;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2295 if (res == 2.0)
2296 fpscr.ioc = 1;
2297 FpscrExc = fpscr;
2298 '''
2299 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2300 2, vacgefpCode, toInt = True)
2301 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2302 4, vacgefpCode, toInt = True)
2303
2304 vceqfpCode = '''
2305 FPSCR fpscr = (FPSCR) FpscrExc;
2306 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2307 true, true, VfpRoundNearest);
2308 destReg = (res == 0) ? -1 : 0;
2309 if (res == 2.0)
2310 fpscr.ioc = 1;
2311 FpscrExc = fpscr;
2312 '''
2313 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2314 2, vceqfpCode, toInt = True)
2315 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2316 4, vceqfpCode, toInt = True)
2317
2318 vrecpsCode = '''
2319 FPSCR fpscr = (FPSCR) FpscrExc;
2320 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2321 true, true, VfpRoundNearest);
2322 FpscrExc = fpscr;
2323 '''
2324 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2325 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2326
2327 vrsqrtsCode = '''
2328 FPSCR fpscr = (FPSCR) FpscrExc;
2329 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2330 true, true, VfpRoundNearest);
2331 FpscrExc = fpscr;
2332 '''
2333 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2334 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2335
2336 vabdfpCode = '''
2337 FPSCR fpscr = (FPSCR) FpscrExc;
2338 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2339 true, true, VfpRoundNearest);
2340 destReg = fabs(mid);
2341 FpscrExc = fpscr;
2342 '''
2343 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2344 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2345
2346 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2347 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2348 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2349 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2350 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2351
2352 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2353 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2354 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2355 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2356 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2357
2358 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2359 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2360 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2361 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2362 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2363
2364 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2365 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2366 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2367 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2368 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2369 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2370 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2371 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2372 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2373
2374 vshrCode = '''
2375 if (imm >= sizeof(srcElem1) * 8) {
2376 if (ltz(srcElem1))
2377 destElem = -1;
2378 else
2379 destElem = 0;
2380 } else {
2381 destElem = srcElem1 >> imm;
2382 }
2383 '''
2384 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2385 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2386
2387 vsraCode = '''
2388 Element mid;;
2389 if (imm >= sizeof(srcElem1) * 8) {
2390 mid = ltz(srcElem1) ? -1 : 0;
2391 } else {
2392 mid = srcElem1 >> imm;
2393 if (ltz(srcElem1) && !ltz(mid)) {
2394 mid |= -(mid & ((Element)1 <<
2395 (sizeof(Element) * 8 - 1 - imm)));
2396 }
2397 }
2398 destElem += mid;
2399 '''
2400 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2401 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2402
2403 vrshrCode = '''
2404 if (imm > sizeof(srcElem1) * 8) {
2405 destElem = 0;
2406 } else if (imm) {
2407 Element rBit = bits(srcElem1, imm - 1);
2408 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2409 } else {
2410 destElem = srcElem1;
2411 }
2412 '''
2413 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2414 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2415
2416 vrsraCode = '''
2417 if (imm > sizeof(srcElem1) * 8) {
2418 destElem += 0;
2419 } else if (imm) {
2420 Element rBit = bits(srcElem1, imm - 1);
2421 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2422 } else {
2423 destElem += srcElem1;
2424 }
2425 '''
2426 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2427 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2428
2429 vsriCode = '''
2430 if (imm >= sizeof(Element) * 8)
2431 destElem = destElem;
2432 else
2433 destElem = (srcElem1 >> imm) |
2434 (destElem & ~mask(sizeof(Element) * 8 - imm));
2435 '''
2436 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2437 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2438
2439 vshlCode = '''
2440 if (imm >= sizeof(Element) * 8)
2441 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2442 else
2443 destElem = srcElem1 << imm;
2444 '''
2445 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2446 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2447
2448 vsliCode = '''
2449 if (imm >= sizeof(Element) * 8)
2450 destElem = destElem;
2451 else
2452 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2453 '''
2454 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2455 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2456
2457 vqshlCode = '''
2458 FPSCR fpscr = (FPSCR) FpscrQc;
2459 if (imm >= sizeof(Element) * 8) {
2460 if (srcElem1 != 0) {
2461 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2462 if (srcElem1 > 0)
2463 destElem = ~destElem;
2464 fpscr.qc = 1;
2465 } else {
2466 destElem = 0;
2467 }
2468 } else if (imm) {
2469 destElem = (srcElem1 << imm);
2470 uint64_t topBits = bits((uint64_t)srcElem1,
2471 sizeof(Element) * 8 - 1,
2472 sizeof(Element) * 8 - 1 - imm);
2473 if (topBits != 0 && topBits != mask(imm + 1)) {
2474 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2475 if (srcElem1 > 0)
2476 destElem = ~destElem;
2477 fpscr.qc = 1;
2478 }
2479 } else {
2480 destElem = srcElem1;
2481 }
2482 FpscrQc = fpscr;
2483 '''
2484 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2485 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2486
2487 vqshluCode = '''
2488 FPSCR fpscr = (FPSCR) FpscrQc;
2489 if (imm >= sizeof(Element) * 8) {
2490 if (srcElem1 != 0) {
2491 destElem = mask(sizeof(Element) * 8);
2492 fpscr.qc = 1;
2493 } else {
2494 destElem = 0;
2495 }
2496 } else if (imm) {
2497 destElem = (srcElem1 << imm);
2498 uint64_t topBits = bits((uint64_t)srcElem1,
2499 sizeof(Element) * 8 - 1,
2500 sizeof(Element) * 8 - imm);
2501 if (topBits != 0) {
2502 destElem = mask(sizeof(Element) * 8);
2503 fpscr.qc = 1;
2504 }
2505 } else {
2506 destElem = srcElem1;
2507 }
2508 FpscrQc = fpscr;
2509 '''
2510 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2511 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2512
2513 vqshlusCode = '''
2514 FPSCR fpscr = (FPSCR) FpscrQc;
2515 if (imm >= sizeof(Element) * 8) {
2516 if (srcElem1 < 0) {
2517 destElem = 0;
2518 fpscr.qc = 1;
2519 } else if (srcElem1 > 0) {
2520 destElem = mask(sizeof(Element) * 8);
2521 fpscr.qc = 1;
2522 } else {
2523 destElem = 0;
2524 }
2525 } else if (imm) {
2526 destElem = (srcElem1 << imm);
2527 uint64_t topBits = bits((uint64_t)srcElem1,
2528 sizeof(Element) * 8 - 1,
2529 sizeof(Element) * 8 - imm);
2530 if (srcElem1 < 0) {
2531 destElem = 0;
2532 fpscr.qc = 1;
2533 } else if (topBits != 0) {
2534 destElem = mask(sizeof(Element) * 8);
2535 fpscr.qc = 1;
2536 }
2537 } else {
2538 if (srcElem1 < 0) {
2539 fpscr.qc = 1;
2540 destElem = 0;
2541 } else {
2542 destElem = srcElem1;
2543 }
2544 }
2545 FpscrQc = fpscr;
2546 '''
2547 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2548 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2549
2550 vshrnCode = '''
2551 if (imm >= sizeof(srcElem1) * 8) {
2552 destElem = 0;
2553 } else {
2554 destElem = srcElem1 >> imm;
2555 }
2556 '''
2557 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2558
2559 vrshrnCode = '''
2560 if (imm > sizeof(srcElem1) * 8) {
2561 destElem = 0;
2562 } else if (imm) {
2563 Element rBit = bits(srcElem1, imm - 1);
2564 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2565 } else {
2566 destElem = srcElem1;
2567 }
2568 '''
2569 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2570
2571 vqshrnCode = '''
2572 FPSCR fpscr = (FPSCR) FpscrQc;
2573 if (imm > sizeof(srcElem1) * 8) {
2574 if (srcElem1 != 0 && srcElem1 != -1)
2575 fpscr.qc = 1;
2576 destElem = 0;
2577 } else if (imm) {
2578 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2579 mid |= -(mid & ((BigElement)1 <<
2580 (sizeof(BigElement) * 8 - 1 - imm)));
2581 if (mid != (Element)mid) {
2582 destElem = mask(sizeof(Element) * 8 - 1);
2583 if (srcElem1 < 0)
2584 destElem = ~destElem;
2585 fpscr.qc = 1;
2586 } else {
2587 destElem = mid;
2588 }
2589 } else {
2590 destElem = srcElem1;
2591 }
2592 FpscrQc = fpscr;
2593 '''
2594 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2595
2596 vqshrunCode = '''
2597 FPSCR fpscr = (FPSCR) FpscrQc;
2598 if (imm > sizeof(srcElem1) * 8) {
2599 if (srcElem1 != 0)
2600 fpscr.qc = 1;
2601 destElem = 0;
2602 } else if (imm) {
2603 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2604 if (mid != (Element)mid) {
2605 destElem = mask(sizeof(Element) * 8);
2606 fpscr.qc = 1;
2607 } else {
2608 destElem = mid;
2609 }
2610 } else {
2611 destElem = srcElem1;
2612 }
2613 FpscrQc = fpscr;
2614 '''
2615 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2616 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2617
2618 vqshrunsCode = '''
2619 FPSCR fpscr = (FPSCR) FpscrQc;
2620 if (imm > sizeof(srcElem1) * 8) {
2621 if (srcElem1 != 0)
2622 fpscr.qc = 1;
2623 destElem = 0;
2624 } else if (imm) {
2625 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2626 if (bits(mid, sizeof(BigElement) * 8 - 1,
2627 sizeof(Element) * 8) != 0) {
2628 if (srcElem1 < 0) {
2629 destElem = 0;
2630 } else {
2631 destElem = mask(sizeof(Element) * 8);
2632 }
2633 fpscr.qc = 1;
2634 } else {
2635 destElem = mid;
2636 }
2637 } else {
2638 destElem = srcElem1;
2639 }
2640 FpscrQc = fpscr;
2641 '''
2642 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2643 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2644
2645 vqrshrnCode = '''
2646 FPSCR fpscr = (FPSCR) FpscrQc;
2647 if (imm > sizeof(srcElem1) * 8) {
2648 if (srcElem1 != 0 && srcElem1 != -1)
2649 fpscr.qc = 1;
2650 destElem = 0;
2651 } else if (imm) {
2652 BigElement mid = (srcElem1 >> (imm - 1));
2653 uint64_t rBit = mid & 0x1;
2654 mid >>= 1;
2655 mid |= -(mid & ((BigElement)1 <<
2656 (sizeof(BigElement) * 8 - 1 - imm)));
2657 mid += rBit;
2658 if (mid != (Element)mid) {
2659 destElem = mask(sizeof(Element) * 8 - 1);
2660 if (srcElem1 < 0)
2661 destElem = ~destElem;
2662 fpscr.qc = 1;
2663 } else {
2664 destElem = mid;
2665 }
2666 } else {
2667 if (srcElem1 != (Element)srcElem1) {
2668 destElem = mask(sizeof(Element) * 8 - 1);
2669 if (srcElem1 < 0)
2670 destElem = ~destElem;
2671 fpscr.qc = 1;
2672 } else {
2673 destElem = srcElem1;
2674 }
2675 }
2676 FpscrQc = fpscr;
2677 '''
2678 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2679 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2680
2681 vqrshrunCode = '''
2682 FPSCR fpscr = (FPSCR) FpscrQc;
2683 if (imm > sizeof(srcElem1) * 8) {
2684 if (srcElem1 != 0)
2685 fpscr.qc = 1;
2686 destElem = 0;
2687 } else if (imm) {
2688 BigElement mid = (srcElem1 >> (imm - 1));
2689 uint64_t rBit = mid & 0x1;
2690 mid >>= 1;
2691 mid += rBit;
2692 if (mid != (Element)mid) {
2693 destElem = mask(sizeof(Element) * 8);
2694 fpscr.qc = 1;
2695 } else {
2696 destElem = mid;
2697 }
2698 } else {
2699 if (srcElem1 != (Element)srcElem1) {
2700 destElem = mask(sizeof(Element) * 8 - 1);
2701 fpscr.qc = 1;
2702 } else {
2703 destElem = srcElem1;
2704 }
2705 }
2706 FpscrQc = fpscr;
2707 '''
2708 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2709 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2710
2711 vqrshrunsCode = '''
2712 FPSCR fpscr = (FPSCR) FpscrQc;
2713 if (imm > sizeof(srcElem1) * 8) {
2714 if (srcElem1 != 0)
2715 fpscr.qc = 1;
2716 destElem = 0;
2717 } else if (imm) {
2718 BigElement mid = (srcElem1 >> (imm - 1));
2719 uint64_t rBit = mid & 0x1;
2720 mid >>= 1;
2721 mid |= -(mid & ((BigElement)1 <<
2722 (sizeof(BigElement) * 8 - 1 - imm)));
2723 mid += rBit;
2724 if (bits(mid, sizeof(BigElement) * 8 - 1,
2725 sizeof(Element) * 8) != 0) {
2726 if (srcElem1 < 0) {
2727 destElem = 0;
2728 } else {
2729 destElem = mask(sizeof(Element) * 8);
2730 }
2731 fpscr.qc = 1;
2732 } else {
2733 destElem = mid;
2734 }
2735 } else {
2736 if (srcElem1 < 0) {
2737 fpscr.qc = 1;
2738 destElem = 0;
2739 } else {
2740 destElem = srcElem1;
2741 }
2742 }
2743 FpscrQc = fpscr;
2744 '''
2745 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2746 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2747
2748 vshllCode = '''
2749 if (imm >= sizeof(destElem) * 8) {
2750 destElem = 0;
2751 } else {
2752 destElem = (BigElement)srcElem1 << imm;
2753 }
2754 '''
2755 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2756
2757 vmovlCode = '''
2758 destElem = srcElem1;
2759 '''
2760 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2761
2762 vcvt2ufxCode = '''
2763 FPSCR fpscr = (FPSCR) FpscrExc;
2764 if (flushToZero(srcElem1))
2765 fpscr.idc = 1;
2766 VfpSavedState state = prepFpState(VfpRoundNearest);
2767 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2768 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2769 __asm__ __volatile__("" :: "m" (destReg));
2770 finishVfp(fpscr, state, true);
2771 FpscrExc = fpscr;
2772 '''
2773 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2774 2, vcvt2ufxCode, toInt = True)
2775 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2776 4, vcvt2ufxCode, toInt = True)
2777
2778 vcvt2sfxCode = '''
2779 FPSCR fpscr = (FPSCR) FpscrExc;
2780 if (flushToZero(srcElem1))
2781 fpscr.idc = 1;
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2784 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2785 __asm__ __volatile__("" :: "m" (destReg));
2786 finishVfp(fpscr, state, true);
2787 FpscrExc = fpscr;
2788 '''
2789 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2790 2, vcvt2sfxCode, toInt = True)
2791 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2792 4, vcvt2sfxCode, toInt = True)
2793
2794 vcvtu2fpCode = '''
2795 FPSCR fpscr = (FPSCR) FpscrExc;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2801 FpscrExc = fpscr;
2802 '''
2803 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2804 2, vcvtu2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvtu2fpCode, fromInt = True)
2807
2808 vcvts2fpCode = '''
2809 FPSCR fpscr = (FPSCR) FpscrExc;
2810 VfpSavedState state = prepFpState(VfpRoundNearest);
2811 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2812 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2813 __asm__ __volatile__("" :: "m" (destElem));
2814 finishVfp(fpscr, state, true);
2815 FpscrExc = fpscr;
2816 '''
2817 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2818 2, vcvts2fpCode, fromInt = True)
2819 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2820 4, vcvts2fpCode, fromInt = True)
2821
2822 vcvts2hCode = '''
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (std::isnan(op1) || std::isnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (std::isnan(op1) || std::isnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (std::isnan(op1) || std::isnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (std::isnan(op1) || std::isnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (std::isnan(op1) || std::isnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (std::isnan(op1) || std::isnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'
704 else:
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
707 eWalkCode += '''
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
711 if readDest:
712 if toInt:
713 eWalkCode += '''
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
716 else:
717 eWalkCode += '''
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
720 readDestCode = ''
721 if readDest:
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
725 if toInt:
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
728 if pairwise:
729 eWalkCode += '''
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
736 %(readDest)s
737 %(op)s
738 %(writeDest)s
739 }
740 ''' % { "op" : op,
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
744 else:
745 eWalkCode += '''
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
750 %(readDest)s
751 %(op)s
752 %(writeDest)s
753 }
754 ''' % { "op" : op,
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
759 if toInt:
760 eWalkCode += '''
761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:
789 src2Cnt = 4
790 src2Prefix = 'Big'
791 if bigDest:
792 destCnt = 4
793 destPrefix = 'Big'
794 eWalkCode = simdEnabledCheckCode + '''
795 %sRegVect srcReg1;
796 %sRegVect srcReg2;
797 %sRegVect destReg;
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
800 eWalkCode += '''
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
804 eWalkCode += '''
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806 ''' % { "reg" : reg }
807 if readDest:
808 for reg in range(destCnt):
809 eWalkCode += '''
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811 ''' % { "reg" : reg }
812 readDestCode = ''
813 if readDest:
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815 eWalkCode += '''
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
820 %(readDest)s
821 %(op)s
822 destReg.elements[i] = htog(destElem);
823 }
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
847
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
851
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
855
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865 ''' % { "reg" : reg }
866 if readDest:
867 eWalkCode += '''
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869 ''' % { "reg" : reg }
870 readDestCode = ''
871 if readDest:
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873 eWalkCode += '''
874 if (imm < 0 && imm >= eCount) {
875 if (FullSystem)
876 fault = new UndefinedInstruction;
877 else
878 fault = new UndefinedInstruction(false, mnemonic);
879 } else {
880 for (unsigned i = 0; i < eCount; i++) {
881 Element srcElem1 = gtoh(srcReg1.elements[i]);
882 Element srcElem2 = gtoh(srcReg2.elements[imm]);
883 Element destElem;
884 %(readDest)s
885 %(op)s
886 destReg.elements[i] = htog(destElem);
887 }
888 }
889 ''' % { "op" : op, "readDest" : readDestCode }
890 for reg in range(rCount):
891 eWalkCode += '''
892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893 ''' % { "reg" : reg }
894 iop = InstObjParams(name, Name,
895 "RegRegRegImmOp",
896 { "code": eWalkCode,
897 "r_count": rCount,
898 "predicate_test": predicateTest,
899 "op_class": opClass }, [])
900 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901 exec_output += NeonEqualRegExecute.subst(iop)
902 for type in types:
903 substDict = { "targs" : type,
904 "class_name" : Name }
905 exec_output += NeonExecDeclare.subst(substDict)
906
907 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908 global header_output, exec_output
909 rCount = 2
910 eWalkCode = simdEnabledCheckCode + '''
911 RegVect srcReg1, srcReg2;
912 BigRegVect destReg;
913 '''
914 for reg in range(rCount):
915 eWalkCode += '''
916 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918 ''' % { "reg" : reg }
919 if readDest:
920 for reg in range(2 * rCount):
921 eWalkCode += '''
922 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923 ''' % { "reg" : reg }
924 readDestCode = ''
925 if readDest:
926 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
927 eWalkCode += '''
928 if (imm < 0 && imm >= eCount) {
929 if (FullSystem)
930 fault = new UndefinedInstruction;
931 else
932 fault = new UndefinedInstruction(false, mnemonic);
933 } else {
934 for (unsigned i = 0; i < eCount; i++) {
935 Element srcElem1 = gtoh(srcReg1.elements[i]);
936 Element srcElem2 = gtoh(srcReg2.elements[imm]);
937 BigElement destElem;
938 %(readDest)s
939 %(op)s
940 destReg.elements[i] = htog(destElem);
941 }
942 }
943 ''' % { "op" : op, "readDest" : readDestCode }
944 for reg in range(2 * rCount):
945 eWalkCode += '''
946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947 ''' % { "reg" : reg }
948 iop = InstObjParams(name, Name,
949 "RegRegRegImmOp",
950 { "code": eWalkCode,
951 "r_count": rCount,
952 "predicate_test": predicateTest,
953 "op_class": opClass }, [])
954 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955 exec_output += NeonUnequalRegExecute.subst(iop)
956 for type in types:
957 substDict = { "targs" : type,
958 "class_name" : Name }
959 exec_output += NeonExecDeclare.subst(substDict)
960
961 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962 global header_output, exec_output
963 eWalkCode = simdEnabledCheckCode + '''
964 typedef FloatReg FloatVect[rCount];
965 FloatVect srcRegs1, srcRegs2, destRegs;
966 '''
967 for reg in range(rCount):
968 eWalkCode += '''
969 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971 ''' % { "reg" : reg }
972 if readDest:
973 eWalkCode += '''
974 destRegs[%(reg)d] = FpDestP%(reg)d;
975 ''' % { "reg" : reg }
976 readDestCode = ''
977 if readDest:
978 readDestCode = 'destReg = destRegs[i];'
979 eWalkCode += '''
980 if (imm < 0 && imm >= eCount) {
981 if (FullSystem)
982 fault = new UndefinedInstruction;
983 else
984 fault = new UndefinedInstruction(false, mnemonic);
985 } else {
986 for (unsigned i = 0; i < rCount; i++) {
987 FloatReg srcReg1 = srcRegs1[i];
988 FloatReg srcReg2 = srcRegs2[imm];
989 FloatReg destReg;
990 %(readDest)s
991 %(op)s
992 destRegs[i] = destReg;
993 }
994 }
995 ''' % { "op" : op, "readDest" : readDestCode }
996 for reg in range(rCount):
997 eWalkCode += '''
998 FpDestP%(reg)d = destRegs[%(reg)d];
999 ''' % { "reg" : reg }
1000 iop = InstObjParams(name, Name,
1001 "FpRegRegRegImmOp",
1002 { "code": eWalkCode,
1003 "r_count": rCount,
1004 "predicate_test": predicateTest,
1005 "op_class": opClass }, [])
1006 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007 exec_output += NeonEqualRegExecute.subst(iop)
1008 for type in types:
1009 substDict = { "targs" : type,
1010 "class_name" : Name }
1011 exec_output += NeonExecDeclare.subst(substDict)
1012
1013 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014 readDest=False, toInt=False, fromInt=False):
1015 global header_output, exec_output
1016 eWalkCode = simdEnabledCheckCode + '''
1017 RegVect srcRegs1, destRegs;
1018 '''
1019 for reg in range(rCount):
1020 eWalkCode += '''
1021 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022 ''' % { "reg" : reg }
1023 if readDest:
1024 eWalkCode += '''
1025 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026 ''' % { "reg" : reg }
1027 readDestCode = ''
1028 if readDest:
1029 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1030 if toInt:
1031 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1033 if fromInt:
1034 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035 declDest = 'Element destElem;'
1036 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1037 if toInt:
1038 declDest = 'FloatRegBits destReg;'
1039 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1040 eWalkCode += '''
1041 for (unsigned i = 0; i < eCount; i++) {
1042 %(readOp)s
1043 %(declDest)s
1044 %(readDest)s
1045 %(op)s
1046 %(writeDest)s
1047 }
1048 ''' % { "readOp" : readOpCode,
1049 "declDest" : declDest,
1050 "readDest" : readDestCode,
1051 "op" : op,
1052 "writeDest" : writeDestCode }
1053 for reg in range(rCount):
1054 eWalkCode += '''
1055 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056 ''' % { "reg" : reg }
1057 iop = InstObjParams(name, Name,
1058 "RegRegImmOp",
1059 { "code": eWalkCode,
1060 "r_count": rCount,
1061 "predicate_test": predicateTest,
1062 "op_class": opClass }, [])
1063 header_output += NeonRegRegImmOpDeclare.subst(iop)
1064 exec_output += NeonEqualRegExecute.subst(iop)
1065 for type in types:
1066 substDict = { "targs" : type,
1067 "class_name" : Name }
1068 exec_output += NeonExecDeclare.subst(substDict)
1069
1070 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071 global header_output, exec_output
1072 eWalkCode = simdEnabledCheckCode + '''
1073 BigRegVect srcReg1;
1074 RegVect destReg;
1075 '''
1076 for reg in range(4):
1077 eWalkCode += '''
1078 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079 ''' % { "reg" : reg }
1080 if readDest:
1081 for reg in range(2):
1082 eWalkCode += '''
1083 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084 ''' % { "reg" : reg }
1085 readDestCode = ''
1086 if readDest:
1087 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1088 eWalkCode += '''
1089 for (unsigned i = 0; i < eCount; i++) {
1090 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1091 Element destElem;
1092 %(readDest)s
1093 %(op)s
1094 destReg.elements[i] = htog(destElem);
1095 }
1096 ''' % { "op" : op, "readDest" : readDestCode }
1097 for reg in range(2):
1098 eWalkCode += '''
1099 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100 ''' % { "reg" : reg }
1101 iop = InstObjParams(name, Name,
1102 "RegRegImmOp",
1103 { "code": eWalkCode,
1104 "r_count": 2,
1105 "predicate_test": predicateTest,
1106 "op_class": opClass }, [])
1107 header_output += NeonRegRegImmOpDeclare.subst(iop)
1108 exec_output += NeonUnequalRegExecute.subst(iop)
1109 for type in types:
1110 substDict = { "targs" : type,
1111 "class_name" : Name }
1112 exec_output += NeonExecDeclare.subst(substDict)
1113
1114 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115 global header_output, exec_output
1116 eWalkCode = simdEnabledCheckCode + '''
1117 RegVect srcReg1;
1118 BigRegVect destReg;
1119 '''
1120 for reg in range(2):
1121 eWalkCode += '''
1122 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123 ''' % { "reg" : reg }
1124 if readDest:
1125 for reg in range(4):
1126 eWalkCode += '''
1127 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128 ''' % { "reg" : reg }
1129 readDestCode = ''
1130 if readDest:
1131 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1132 eWalkCode += '''
1133 for (unsigned i = 0; i < eCount; i++) {
1134 Element srcElem1 = gtoh(srcReg1.elements[i]);
1135 BigElement destElem;
1136 %(readDest)s
1137 %(op)s
1138 destReg.elements[i] = htog(destElem);
1139 }
1140 ''' % { "op" : op, "readDest" : readDestCode }
1141 for reg in range(4):
1142 eWalkCode += '''
1143 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144 ''' % { "reg" : reg }
1145 iop = InstObjParams(name, Name,
1146 "RegRegImmOp",
1147 { "code": eWalkCode,
1148 "r_count": 2,
1149 "predicate_test": predicateTest,
1150 "op_class": opClass }, [])
1151 header_output += NeonRegRegImmOpDeclare.subst(iop)
1152 exec_output += NeonUnequalRegExecute.subst(iop)
1153 for type in types:
1154 substDict = { "targs" : type,
1155 "class_name" : Name }
1156 exec_output += NeonExecDeclare.subst(substDict)
1157
1158 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159 global header_output, exec_output
1160 eWalkCode = simdEnabledCheckCode + '''
1161 RegVect srcReg1, destReg;
1162 '''
1163 for reg in range(rCount):
1164 eWalkCode += '''
1165 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166 ''' % { "reg" : reg }
1167 if readDest:
1168 eWalkCode += '''
1169 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170 ''' % { "reg" : reg }
1171 readDestCode = ''
1172 if readDest:
1173 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1174 eWalkCode += '''
1175 for (unsigned i = 0; i < eCount; i++) {
1176 unsigned j = i;
1177 Element srcElem1 = gtoh(srcReg1.elements[i]);
1178 Element destElem;
1179 %(readDest)s
1180 %(op)s
1181 destReg.elements[j] = htog(destElem);
1182 }
1183 ''' % { "op" : op, "readDest" : readDestCode }
1184 for reg in range(rCount):
1185 eWalkCode += '''
1186 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187 ''' % { "reg" : reg }
1188 iop = InstObjParams(name, Name,
1189 "RegRegOp",
1190 { "code": eWalkCode,
1191 "r_count": rCount,
1192 "predicate_test": predicateTest,
1193 "op_class": opClass }, [])
1194 header_output += NeonRegRegOpDeclare.subst(iop)
1195 exec_output += NeonEqualRegExecute.subst(iop)
1196 for type in types:
1197 substDict = { "targs" : type,
1198 "class_name" : Name }
1199 exec_output += NeonExecDeclare.subst(substDict)
1200
1201 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202 global header_output, exec_output
1203 eWalkCode = simdEnabledCheckCode + '''
1204 RegVect srcReg1, destReg;
1205 '''
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209 ''' % { "reg" : reg }
1210 if readDest:
1211 eWalkCode += '''
1212 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213 ''' % { "reg" : reg }
1214 readDestCode = ''
1215 if readDest:
1216 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1217 eWalkCode += '''
1218 for (unsigned i = 0; i < eCount; i++) {
1219 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1220 Element destElem;
1221 %(readDest)s
1222 %(op)s
1223 destReg.elements[i] = htog(destElem);
1224 }
1225 ''' % { "op" : op, "readDest" : readDestCode }
1226 for reg in range(rCount):
1227 eWalkCode += '''
1228 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229 ''' % { "reg" : reg }
1230 iop = InstObjParams(name, Name,
1231 "RegRegImmOp",
1232 { "code": eWalkCode,
1233 "r_count": rCount,
1234 "predicate_test": predicateTest,
1235 "op_class": opClass }, [])
1236 header_output += NeonRegRegImmOpDeclare.subst(iop)
1237 exec_output += NeonEqualRegExecute.subst(iop)
1238 for type in types:
1239 substDict = { "targs" : type,
1240 "class_name" : Name }
1241 exec_output += NeonExecDeclare.subst(substDict)
1242
1243 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244 global header_output, exec_output
1245 eWalkCode = simdEnabledCheckCode + '''
1246 RegVect srcReg1, destReg;
1247 '''
1248 for reg in range(rCount):
1249 eWalkCode += '''
1250 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252 ''' % { "reg" : reg }
1253 if readDest:
1254 eWalkCode += '''
1255 ''' % { "reg" : reg }
1256 readDestCode = ''
1257 if readDest:
1258 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1259 eWalkCode += op
1260 for reg in range(rCount):
1261 eWalkCode += '''
1262 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264 ''' % { "reg" : reg }
1265 iop = InstObjParams(name, Name,
1266 "RegRegOp",
1267 { "code": eWalkCode,
1268 "r_count": rCount,
1269 "predicate_test": predicateTest,
1270 "op_class": opClass }, [])
1271 header_output += NeonRegRegOpDeclare.subst(iop)
1272 exec_output += NeonEqualRegExecute.subst(iop)
1273 for type in types:
1274 substDict = { "targs" : type,
1275 "class_name" : Name }
1276 exec_output += NeonExecDeclare.subst(substDict)
1277
1278 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279 readDest=False, toInt=False):
1280 global header_output, exec_output
1281 eWalkCode = simdEnabledCheckCode + '''
1282 typedef FloatReg FloatVect[rCount];
1283 FloatVect srcRegs1;
1284 '''
1285 if toInt:
1286 eWalkCode += 'RegVect destRegs;\n'
1287 else:
1288 eWalkCode += 'FloatVect destRegs;\n'
1289 for reg in range(rCount):
1290 eWalkCode += '''
1291 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292 ''' % { "reg" : reg }
1293 if readDest:
1294 if toInt:
1295 eWalkCode += '''
1296 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297 ''' % { "reg" : reg }
1298 else:
1299 eWalkCode += '''
1300 destRegs[%(reg)d] = FpDestP%(reg)d;
1301 ''' % { "reg" : reg }
1302 readDestCode = ''
1303 if readDest:
1304 readDestCode = 'destReg = destRegs[i];'
1305 destType = 'FloatReg'
1306 writeDest = 'destRegs[r] = destReg;'
1307 if toInt:
1308 destType = 'FloatRegBits'
1309 writeDest = 'destRegs.regs[r] = destReg;'
1310 eWalkCode += '''
1311 for (unsigned r = 0; r < rCount; r++) {
1312 FloatReg srcReg1 = srcRegs1[r];
1313 %(destType)s destReg;
1314 %(readDest)s
1315 %(op)s
1316 %(writeDest)s
1317 }
1318 ''' % { "op" : op,
1319 "readDest" : readDestCode,
1320 "destType" : destType,
1321 "writeDest" : writeDest }
1322 for reg in range(rCount):
1323 if toInt:
1324 eWalkCode += '''
1325 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326 ''' % { "reg" : reg }
1327 else:
1328 eWalkCode += '''
1329 FpDestP%(reg)d = destRegs[%(reg)d];
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "FpRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": rCount,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegOpDeclare.subst(iop)
1338 exec_output += NeonEqualRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345 global header_output, exec_output
1346 eWalkCode = simdEnabledCheckCode + '''
1347 RegVect srcRegs;
1348 BigRegVect destReg;
1349 '''
1350 for reg in range(rCount):
1351 eWalkCode += '''
1352 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353 ''' % { "reg" : reg }
1354 if readDest:
1355 eWalkCode += '''
1356 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357 ''' % { "reg" : reg }
1358 readDestCode = ''
1359 if readDest:
1360 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1361 eWalkCode += '''
1362 for (unsigned i = 0; i < eCount / 2; i++) {
1363 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365 BigElement destElem;
1366 %(readDest)s
1367 %(op)s
1368 destReg.elements[i] = htog(destElem);
1369 }
1370 ''' % { "op" : op, "readDest" : readDestCode }
1371 for reg in range(rCount):
1372 eWalkCode += '''
1373 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374 ''' % { "reg" : reg }
1375 iop = InstObjParams(name, Name,
1376 "RegRegOp",
1377 { "code": eWalkCode,
1378 "r_count": rCount,
1379 "predicate_test": predicateTest,
1380 "op_class": opClass }, [])
1381 header_output += NeonRegRegOpDeclare.subst(iop)
1382 exec_output += NeonUnequalRegExecute.subst(iop)
1383 for type in types:
1384 substDict = { "targs" : type,
1385 "class_name" : Name }
1386 exec_output += NeonExecDeclare.subst(substDict)
1387
1388 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389 global header_output, exec_output
1390 eWalkCode = simdEnabledCheckCode + '''
1391 BigRegVect srcReg1;
1392 RegVect destReg;
1393 '''
1394 for reg in range(4):
1395 eWalkCode += '''
1396 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397 ''' % { "reg" : reg }
1398 if readDest:
1399 for reg in range(2):
1400 eWalkCode += '''
1401 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402 ''' % { "reg" : reg }
1403 readDestCode = ''
1404 if readDest:
1405 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1406 eWalkCode += '''
1407 for (unsigned i = 0; i < eCount; i++) {
1408 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1409 Element destElem;
1410 %(readDest)s
1411 %(op)s
1412 destReg.elements[i] = htog(destElem);
1413 }
1414 ''' % { "op" : op, "readDest" : readDestCode }
1415 for reg in range(2):
1416 eWalkCode += '''
1417 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418 ''' % { "reg" : reg }
1419 iop = InstObjParams(name, Name,
1420 "RegRegOp",
1421 { "code": eWalkCode,
1422 "r_count": 2,
1423 "predicate_test": predicateTest,
1424 "op_class": opClass }, [])
1425 header_output += NeonRegRegOpDeclare.subst(iop)
1426 exec_output += NeonUnequalRegExecute.subst(iop)
1427 for type in types:
1428 substDict = { "targs" : type,
1429 "class_name" : Name }
1430 exec_output += NeonExecDeclare.subst(substDict)
1431
1432 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433 global header_output, exec_output
1434 eWalkCode = simdEnabledCheckCode + '''
1435 RegVect destReg;
1436 '''
1437 if readDest:
1438 for reg in range(rCount):
1439 eWalkCode += '''
1440 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441 ''' % { "reg" : reg }
1442 readDestCode = ''
1443 if readDest:
1444 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1445 eWalkCode += '''
1446 for (unsigned i = 0; i < eCount; i++) {
1447 Element destElem;
1448 %(readDest)s
1449 %(op)s
1450 destReg.elements[i] = htog(destElem);
1451 }
1452 ''' % { "op" : op, "readDest" : readDestCode }
1453 for reg in range(rCount):
1454 eWalkCode += '''
1455 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456 ''' % { "reg" : reg }
1457 iop = InstObjParams(name, Name,
1458 "RegImmOp",
1459 { "code": eWalkCode,
1460 "r_count": rCount,
1461 "predicate_test": predicateTest,
1462 "op_class": opClass }, [])
1463 header_output += NeonRegImmOpDeclare.subst(iop)
1464 exec_output += NeonEqualRegExecute.subst(iop)
1465 for type in types:
1466 substDict = { "targs" : type,
1467 "class_name" : Name }
1468 exec_output += NeonExecDeclare.subst(substDict)
1469
1470 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471 global header_output, exec_output
1472 eWalkCode = simdEnabledCheckCode + '''
1473 RegVect srcReg1;
1474 BigRegVect destReg;
1475 '''
1476 for reg in range(2):
1477 eWalkCode += '''
1478 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479 ''' % { "reg" : reg }
1480 if readDest:
1481 for reg in range(4):
1482 eWalkCode += '''
1483 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484 ''' % { "reg" : reg }
1485 readDestCode = ''
1486 if readDest:
1487 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1488 eWalkCode += '''
1489 for (unsigned i = 0; i < eCount; i++) {
1490 Element srcElem1 = gtoh(srcReg1.elements[i]);
1491 BigElement destElem;
1492 %(readDest)s
1493 %(op)s
1494 destReg.elements[i] = htog(destElem);
1495 }
1496 ''' % { "op" : op, "readDest" : readDestCode }
1497 for reg in range(4):
1498 eWalkCode += '''
1499 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500 ''' % { "reg" : reg }
1501 iop = InstObjParams(name, Name,
1502 "RegRegOp",
1503 { "code": eWalkCode,
1504 "r_count": 2,
1505 "predicate_test": predicateTest,
1506 "op_class": opClass }, [])
1507 header_output += NeonRegRegOpDeclare.subst(iop)
1508 exec_output += NeonUnequalRegExecute.subst(iop)
1509 for type in types:
1510 substDict = { "targs" : type,
1511 "class_name" : Name }
1512 exec_output += NeonExecDeclare.subst(substDict)
1513
1514 vhaddCode = '''
1515 Element carryBit =
1516 (((unsigned)srcElem1 & 0x1) +
1517 ((unsigned)srcElem2 & 0x1)) >> 1;
1518 // Use division instead of a shift to ensure the sign extension works
1519 // right. The compiler will figure out if it can be a shift. Mask the
1520 // inputs so they get truncated correctly.
1521 destElem = (((srcElem1 & ~(Element)1) / 2) +
1522 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1523 '''
1524 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1526
1527 vrhaddCode = '''
1528 Element carryBit =
1529 (((unsigned)srcElem1 & 0x1) +
1530 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531 // Use division instead of a shift to ensure the sign extension works
1532 // right. The compiler will figure out if it can be a shift. Mask the
1533 // inputs so they get truncated correctly.
1534 destElem = (((srcElem1 & ~(Element)1) / 2) +
1535 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1536 '''
1537 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1539
1540 vhsubCode = '''
1541 Element barrowBit =
1542 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543 // Use division instead of a shift to ensure the sign extension works
1544 // right. The compiler will figure out if it can be a shift. Mask the
1545 // inputs so they get truncated correctly.
1546 destElem = (((srcElem1 & ~(Element)1) / 2) -
1547 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1548 '''
1549 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1551
1552 vandCode = '''
1553 destElem = srcElem1 & srcElem2;
1554 '''
1555 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1557
1558 vbicCode = '''
1559 destElem = srcElem1 & ~srcElem2;
1560 '''
1561 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1563
1564 vorrCode = '''
1565 destElem = srcElem1 | srcElem2;
1566 '''
1567 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1569
1570 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1572
1573 vornCode = '''
1574 destElem = srcElem1 | ~srcElem2;
1575 '''
1576 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1578
1579 veorCode = '''
1580 destElem = srcElem1 ^ srcElem2;
1581 '''
1582 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1584
1585 vbifCode = '''
1586 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1587 '''
1588 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1590 vbitCode = '''
1591 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1592 '''
1593 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1595 vbslCode = '''
1596 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1597 '''
1598 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1600
1601 vmaxCode = '''
1602 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1603 '''
1604 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1606
1607 vminCode = '''
1608 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1609 '''
1610 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613 vaddCode = '''
1614 destElem = srcElem1 + srcElem2;
1615 '''
1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1620 2, vaddCode, pairwise=True)
1621 vaddlwCode = '''
1622 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1623 '''
1624 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1625 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1626 vaddhnCode = '''
1627 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1628 (sizeof(Element) * 8);
1629 '''
1630 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1631 vraddhnCode = '''
1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1633 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1634 (sizeof(Element) * 8);
1635 '''
1636 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1637
1638 vsubCode = '''
1639 destElem = srcElem1 - srcElem2;
1640 '''
1641 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1642 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1643 vsublwCode = '''
1644 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1645 '''
1646 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1647 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1648
1649 vqaddUCode = '''
1650 destElem = srcElem1 + srcElem2;
1651 FPSCR fpscr = (FPSCR) FpscrQc;
1652 if (destElem < srcElem1 || destElem < srcElem2) {
1653 destElem = (Element)(-1);
1654 fpscr.qc = 1;
1655 }
1656 FpscrQc = fpscr;
1657 '''
1658 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1659 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1660 vsubhnCode = '''
1661 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1662 (sizeof(Element) * 8);
1663 '''
1664 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1665 vrsubhnCode = '''
1666 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1667 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1668 (sizeof(Element) * 8);
1669 '''
1670 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1671
1672 vqaddSCode = '''
1673 destElem = srcElem1 + srcElem2;
1674 FPSCR fpscr = (FPSCR) FpscrQc;
1675 bool negDest = (destElem < 0);
1676 bool negSrc1 = (srcElem1 < 0);
1677 bool negSrc2 = (srcElem2 < 0);
1678 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1679 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1680 if (negDest)
1681 destElem -= 1;
1682 fpscr.qc = 1;
1683 }
1684 FpscrQc = fpscr;
1685 '''
1686 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1687 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1688
1689 vqsubUCode = '''
1690 destElem = srcElem1 - srcElem2;
1691 FPSCR fpscr = (FPSCR) FpscrQc;
1692 if (destElem > srcElem1) {
1693 destElem = 0;
1694 fpscr.qc = 1;
1695 }
1696 FpscrQc = fpscr;
1697 '''
1698 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1699 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1700
1701 vqsubSCode = '''
1702 destElem = srcElem1 - srcElem2;
1703 FPSCR fpscr = (FPSCR) FpscrQc;
1704 bool negDest = (destElem < 0);
1705 bool negSrc1 = (srcElem1 < 0);
1706 bool posSrc2 = (srcElem2 >= 0);
1707 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1708 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1709 if (negDest)
1710 destElem -= 1;
1711 fpscr.qc = 1;
1712 }
1713 FpscrQc = fpscr;
1714 '''
1715 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1716 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1717
1718 vcgtCode = '''
1719 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1720 '''
1721 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1722 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1723
1724 vcgeCode = '''
1725 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1726 '''
1727 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1728 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1729
1730 vceqCode = '''
1731 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1732 '''
1733 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1734 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1735
1736 vshlCode = '''
1737 int16_t shiftAmt = (int8_t)srcElem2;
1738 if (shiftAmt < 0) {
1739 shiftAmt = -shiftAmt;
1740 if (shiftAmt >= sizeof(Element) * 8) {
1741 shiftAmt = sizeof(Element) * 8 - 1;
1742 destElem = 0;
1743 } else {
1744 destElem = (srcElem1 >> shiftAmt);
1745 }
1746 // Make sure the right shift sign extended when it should.
1747 if (ltz(srcElem1) && !ltz(destElem)) {
1748 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1749 1 - shiftAmt));
1750 }
1751 } else {
1752 if (shiftAmt >= sizeof(Element) * 8) {
1753 destElem = 0;
1754 } else {
1755 destElem = srcElem1 << shiftAmt;
1756 }
1757 }
1758 '''
1759 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1760 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1761
1762 vrshlCode = '''
1763 int16_t shiftAmt = (int8_t)srcElem2;
1764 if (shiftAmt < 0) {
1765 shiftAmt = -shiftAmt;
1766 Element rBit = 0;
1767 if (shiftAmt <= sizeof(Element) * 8)
1768 rBit = bits(srcElem1, shiftAmt - 1);
1769 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1770 rBit = 1;
1771 if (shiftAmt >= sizeof(Element) * 8) {
1772 shiftAmt = sizeof(Element) * 8 - 1;
1773 destElem = 0;
1774 } else {
1775 destElem = (srcElem1 >> shiftAmt);
1776 }
1777 // Make sure the right shift sign extended when it should.
1778 if (ltz(srcElem1) && !ltz(destElem)) {
1779 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1780 1 - shiftAmt));
1781 }
1782 destElem += rBit;
1783 } else if (shiftAmt > 0) {
1784 if (shiftAmt >= sizeof(Element) * 8) {
1785 destElem = 0;
1786 } else {
1787 destElem = srcElem1 << shiftAmt;
1788 }
1789 } else {
1790 destElem = srcElem1;
1791 }
1792 '''
1793 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1794 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1795
1796 vqshlUCode = '''
1797 int16_t shiftAmt = (int8_t)srcElem2;
1798 FPSCR fpscr = (FPSCR) FpscrQc;
1799 if (shiftAmt < 0) {
1800 shiftAmt = -shiftAmt;
1801 if (shiftAmt >= sizeof(Element) * 8) {
1802 shiftAmt = sizeof(Element) * 8 - 1;
1803 destElem = 0;
1804 } else {
1805 destElem = (srcElem1 >> shiftAmt);
1806 }
1807 } else if (shiftAmt > 0) {
1808 if (shiftAmt >= sizeof(Element) * 8) {
1809 if (srcElem1 != 0) {
1810 destElem = mask(sizeof(Element) * 8);
1811 fpscr.qc = 1;
1812 } else {
1813 destElem = 0;
1814 }
1815 } else {
1816 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1817 sizeof(Element) * 8 - shiftAmt)) {
1818 destElem = mask(sizeof(Element) * 8);
1819 fpscr.qc = 1;
1820 } else {
1821 destElem = srcElem1 << shiftAmt;
1822 }
1823 }
1824 } else {
1825 destElem = srcElem1;
1826 }
1827 FpscrQc = fpscr;
1828 '''
1829 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1830 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1831
1832 vqshlSCode = '''
1833 int16_t shiftAmt = (int8_t)srcElem2;
1834 FPSCR fpscr = (FPSCR) FpscrQc;
1835 if (shiftAmt < 0) {
1836 shiftAmt = -shiftAmt;
1837 if (shiftAmt >= sizeof(Element) * 8) {
1838 shiftAmt = sizeof(Element) * 8 - 1;
1839 destElem = 0;
1840 } else {
1841 destElem = (srcElem1 >> shiftAmt);
1842 }
1843 // Make sure the right shift sign extended when it should.
1844 if (srcElem1 < 0 && destElem >= 0) {
1845 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1846 1 - shiftAmt));
1847 }
1848 } else if (shiftAmt > 0) {
1849 bool sat = false;
1850 if (shiftAmt >= sizeof(Element) * 8) {
1851 if (srcElem1 != 0)
1852 sat = true;
1853 else
1854 destElem = 0;
1855 } else {
1856 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1857 sizeof(Element) * 8 - 1 - shiftAmt) !=
1858 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1859 sat = true;
1860 } else {
1861 destElem = srcElem1 << shiftAmt;
1862 }
1863 }
1864 if (sat) {
1865 fpscr.qc = 1;
1866 destElem = mask(sizeof(Element) * 8 - 1);
1867 if (srcElem1 < 0)
1868 destElem = ~destElem;
1869 }
1870 } else {
1871 destElem = srcElem1;
1872 }
1873 FpscrQc = fpscr;
1874 '''
1875 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1876 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1877
1878 vqrshlUCode = '''
1879 int16_t shiftAmt = (int8_t)srcElem2;
1880 FPSCR fpscr = (FPSCR) FpscrQc;
1881 if (shiftAmt < 0) {
1882 shiftAmt = -shiftAmt;
1883 Element rBit = 0;
1884 if (shiftAmt <= sizeof(Element) * 8)
1885 rBit = bits(srcElem1, shiftAmt - 1);
1886 if (shiftAmt >= sizeof(Element) * 8) {
1887 shiftAmt = sizeof(Element) * 8 - 1;
1888 destElem = 0;
1889 } else {
1890 destElem = (srcElem1 >> shiftAmt);
1891 }
1892 destElem += rBit;
1893 } else {
1894 if (shiftAmt >= sizeof(Element) * 8) {
1895 if (srcElem1 != 0) {
1896 destElem = mask(sizeof(Element) * 8);
1897 fpscr.qc = 1;
1898 } else {
1899 destElem = 0;
1900 }
1901 } else {
1902 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1903 sizeof(Element) * 8 - shiftAmt)) {
1904 destElem = mask(sizeof(Element) * 8);
1905 fpscr.qc = 1;
1906 } else {
1907 destElem = srcElem1 << shiftAmt;
1908 }
1909 }
1910 }
1911 FpscrQc = fpscr;
1912 '''
1913 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1914 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1915
1916 vqrshlSCode = '''
1917 int16_t shiftAmt = (int8_t)srcElem2;
1918 FPSCR fpscr = (FPSCR) FpscrQc;
1919 if (shiftAmt < 0) {
1920 shiftAmt = -shiftAmt;
1921 Element rBit = 0;
1922 if (shiftAmt <= sizeof(Element) * 8)
1923 rBit = bits(srcElem1, shiftAmt - 1);
1924 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1925 rBit = 1;
1926 if (shiftAmt >= sizeof(Element) * 8) {
1927 shiftAmt = sizeof(Element) * 8 - 1;
1928 destElem = 0;
1929 } else {
1930 destElem = (srcElem1 >> shiftAmt);
1931 }
1932 // Make sure the right shift sign extended when it should.
1933 if (srcElem1 < 0 && destElem >= 0) {
1934 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1935 1 - shiftAmt));
1936 }
1937 destElem += rBit;
1938 } else if (shiftAmt > 0) {
1939 bool sat = false;
1940 if (shiftAmt >= sizeof(Element) * 8) {
1941 if (srcElem1 != 0)
1942 sat = true;
1943 else
1944 destElem = 0;
1945 } else {
1946 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1947 sizeof(Element) * 8 - 1 - shiftAmt) !=
1948 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1949 sat = true;
1950 } else {
1951 destElem = srcElem1 << shiftAmt;
1952 }
1953 }
1954 if (sat) {
1955 fpscr.qc = 1;
1956 destElem = mask(sizeof(Element) * 8 - 1);
1957 if (srcElem1 < 0)
1958 destElem = ~destElem;
1959 }
1960 } else {
1961 destElem = srcElem1;
1962 }
1963 FpscrQc = fpscr;
1964 '''
1965 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1966 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1967
1968 vabaCode = '''
1969 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1970 (srcElem2 - srcElem1);
1971 '''
1972 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1973 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1974 vabalCode = '''
1975 destElem += (srcElem1 > srcElem2) ?
1976 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1977 ((BigElement)srcElem2 - (BigElement)srcElem1);
1978 '''
1979 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1980
1981 vabdCode = '''
1982 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1983 (srcElem2 - srcElem1);
1984 '''
1985 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1986 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1987 vabdlCode = '''
1988 destElem = (srcElem1 > srcElem2) ?
1989 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1990 ((BigElement)srcElem2 - (BigElement)srcElem1);
1991 '''
1992 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1993
1994 vtstCode = '''
1995 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1996 '''
1997 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1998 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1999
2000 vmulCode = '''
2001 destElem = srcElem1 * srcElem2;
2002 '''
2003 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2004 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2005 vmullCode = '''
2006 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2007 '''
2008 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2009
2010 vmlaCode = '''
2011 destElem = destElem + srcElem1 * srcElem2;
2012 '''
2013 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2014 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2015 vmlalCode = '''
2016 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2017 '''
2018 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2019
2020 vqdmlalCode = '''
2021 FPSCR fpscr = (FPSCR) FpscrQc;
2022 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2023 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2024 Element halfNeg = maxNeg / 2;
2025 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2026 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2027 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2028 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2029 fpscr.qc = 1;
2030 }
2031 bool negPreDest = ltz(destElem);
2032 destElem += midElem;
2033 bool negDest = ltz(destElem);
2034 bool negMid = ltz(midElem);
2035 if (negPreDest == negMid && negMid != negDest) {
2036 destElem = mask(sizeof(BigElement) * 8 - 1);
2037 if (negPreDest)
2038 destElem = ~destElem;
2039 fpscr.qc = 1;
2040 }
2041 FpscrQc = fpscr;
2042 '''
2043 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2044
2045 vqdmlslCode = '''
2046 FPSCR fpscr = (FPSCR) FpscrQc;
2047 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2048 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2049 Element halfNeg = maxNeg / 2;
2050 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2051 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2052 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2053 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2054 fpscr.qc = 1;
2055 }
2056 bool negPreDest = ltz(destElem);
2057 destElem -= midElem;
2058 bool negDest = ltz(destElem);
2059 bool posMid = ltz((BigElement)-midElem);
2060 if (negPreDest == posMid && posMid != negDest) {
2061 destElem = mask(sizeof(BigElement) * 8 - 1);
2062 if (negPreDest)
2063 destElem = ~destElem;
2064 fpscr.qc = 1;
2065 }
2066 FpscrQc = fpscr;
2067 '''
2068 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2069
2070 vqdmullCode = '''
2071 FPSCR fpscr = (FPSCR) FpscrQc;
2072 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2073 if (srcElem1 == srcElem2 &&
2074 srcElem1 == (Element)((Element)1 <<
2075 (Element)(sizeof(Element) * 8 - 1))) {
2076 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2077 fpscr.qc = 1;
2078 }
2079 FpscrQc = fpscr;
2080 '''
2081 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2082
2083 vmlsCode = '''
2084 destElem = destElem - srcElem1 * srcElem2;
2085 '''
2086 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2087 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2088 vmlslCode = '''
2089 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2090 '''
2091 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2092
2093 vmulpCode = '''
2094 destElem = 0;
2095 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2096 if (bits(srcElem2, j))
2097 destElem ^= srcElem1 << j;
2098 }
2099 '''
2100 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2101 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2102 vmullpCode = '''
2103 destElem = 0;
2104 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2105 if (bits(srcElem2, j))
2106 destElem ^= (BigElement)srcElem1 << j;
2107 }
2108 '''
2109 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2110
2111 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2112
2113 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2114
2115 vqdmulhCode = '''
2116 FPSCR fpscr = (FPSCR) FpscrQc;
2117 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2118 (sizeof(Element) * 8);
2119 if (srcElem1 == srcElem2 &&
2120 srcElem1 == (Element)((Element)1 <<
2121 (sizeof(Element) * 8 - 1))) {
2122 destElem = ~srcElem1;
2123 fpscr.qc = 1;
2124 }
2125 FpscrQc = fpscr;
2126 '''
2127 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2128 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2129
2130 vqrdmulhCode = '''
2131 FPSCR fpscr = (FPSCR) FpscrQc;
2132 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2133 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2134 (sizeof(Element) * 8);
2135 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2136 Element halfNeg = maxNeg / 2;
2137 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2138 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2139 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2140 if (destElem < 0) {
2141 destElem = mask(sizeof(Element) * 8 - 1);
2142 } else {
2143 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2144 }
2145 fpscr.qc = 1;
2146 }
2147 FpscrQc = fpscr;
2148 '''
2149 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2150 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2151 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2152 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2153
2154 vmaxfpCode = '''
2155 FPSCR fpscr = (FPSCR) FpscrExc;
2156 bool done;
2157 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2158 if (!done) {
2159 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2160 true, true, VfpRoundNearest);
2161 } else if (flushToZero(srcReg1, srcReg2)) {
2162 fpscr.idc = 1;
2163 }
2164 FpscrExc = fpscr;
2165 '''
2166 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2167 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2168
2169 vminfpCode = '''
2170 FPSCR fpscr = (FPSCR) FpscrExc;
2171 bool done;
2172 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2173 if (!done) {
2174 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2175 true, true, VfpRoundNearest);
2176 } else if (flushToZero(srcReg1, srcReg2)) {
2177 fpscr.idc = 1;
2178 }
2179 FpscrExc = fpscr;
2180 '''
2181 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2182 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2183
2184 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2185 2, vmaxfpCode, pairwise=True)
2186 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2187 4, vmaxfpCode, pairwise=True)
2188
2189 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2190 2, vminfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2192 4, vminfpCode, pairwise=True)
2193
2194 vaddfpCode = '''
2195 FPSCR fpscr = (FPSCR) FpscrExc;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2197 true, true, VfpRoundNearest);
2198 FpscrExc = fpscr;
2199 '''
2200 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2201 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2202
2203 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2204 2, vaddfpCode, pairwise=True)
2205 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2206 4, vaddfpCode, pairwise=True)
2207
2208 vsubfpCode = '''
2209 FPSCR fpscr = (FPSCR) FpscrExc;
2210 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2211 true, true, VfpRoundNearest);
2212 FpscrExc = fpscr;
2213 '''
2214 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2215 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2216
2217 vmulfpCode = '''
2218 FPSCR fpscr = (FPSCR) FpscrExc;
2219 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2220 true, true, VfpRoundNearest);
2221 FpscrExc = fpscr;
2222 '''
2223 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2224 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2225
2226 vmlafpCode = '''
2227 FPSCR fpscr = (FPSCR) FpscrExc;
2228 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2229 true, true, VfpRoundNearest);
2230 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2231 true, true, VfpRoundNearest);
2232 FpscrExc = fpscr;
2233 '''
2234 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2235 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2236
2237 vmlsfpCode = '''
2238 FPSCR fpscr = (FPSCR) FpscrExc;
2239 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2240 true, true, VfpRoundNearest);
2241 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2242 true, true, VfpRoundNearest);
2243 FpscrExc = fpscr;
2244 '''
2245 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2246 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2247
2248 vcgtfpCode = '''
2249 FPSCR fpscr = (FPSCR) FpscrExc;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2253 if (res == 2.0)
2254 fpscr.ioc = 1;
2255 FpscrExc = fpscr;
2256 '''
2257 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgtfpCode, toInt = True)
2259 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgtfpCode, toInt = True)
2261
2262 vcgefpCode = '''
2263 FPSCR fpscr = (FPSCR) FpscrExc;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2267 if (res == 2.0)
2268 fpscr.ioc = 1;
2269 FpscrExc = fpscr;
2270 '''
2271 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2272 2, vcgefpCode, toInt = True)
2273 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2274 4, vcgefpCode, toInt = True)
2275
2276 vacgtfpCode = '''
2277 FPSCR fpscr = (FPSCR) FpscrExc;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2281 if (res == 2.0)
2282 fpscr.ioc = 1;
2283 FpscrExc = fpscr;
2284 '''
2285 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgtfpCode, toInt = True)
2287 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgtfpCode, toInt = True)
2289
2290 vacgefpCode = '''
2291 FPSCR fpscr = (FPSCR) FpscrExc;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2295 if (res == 2.0)
2296 fpscr.ioc = 1;
2297 FpscrExc = fpscr;
2298 '''
2299 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2300 2, vacgefpCode, toInt = True)
2301 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2302 4, vacgefpCode, toInt = True)
2303
2304 vceqfpCode = '''
2305 FPSCR fpscr = (FPSCR) FpscrExc;
2306 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2307 true, true, VfpRoundNearest);
2308 destReg = (res == 0) ? -1 : 0;
2309 if (res == 2.0)
2310 fpscr.ioc = 1;
2311 FpscrExc = fpscr;
2312 '''
2313 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2314 2, vceqfpCode, toInt = True)
2315 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2316 4, vceqfpCode, toInt = True)
2317
2318 vrecpsCode = '''
2319 FPSCR fpscr = (FPSCR) FpscrExc;
2320 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2321 true, true, VfpRoundNearest);
2322 FpscrExc = fpscr;
2323 '''
2324 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2325 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2326
2327 vrsqrtsCode = '''
2328 FPSCR fpscr = (FPSCR) FpscrExc;
2329 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2330 true, true, VfpRoundNearest);
2331 FpscrExc = fpscr;
2332 '''
2333 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2334 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2335
2336 vabdfpCode = '''
2337 FPSCR fpscr = (FPSCR) FpscrExc;
2338 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2339 true, true, VfpRoundNearest);
2340 destReg = fabs(mid);
2341 FpscrExc = fpscr;
2342 '''
2343 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2344 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2345
2346 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2347 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2348 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2349 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2350 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2351
2352 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2353 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2354 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2355 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2356 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2357
2358 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2359 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2360 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2361 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2362 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2363
2364 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2365 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2366 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2367 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2368 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2369 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2370 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2371 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2372 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2373
2374 vshrCode = '''
2375 if (imm >= sizeof(srcElem1) * 8) {
2376 if (ltz(srcElem1))
2377 destElem = -1;
2378 else
2379 destElem = 0;
2380 } else {
2381 destElem = srcElem1 >> imm;
2382 }
2383 '''
2384 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2385 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2386
2387 vsraCode = '''
2388 Element mid;;
2389 if (imm >= sizeof(srcElem1) * 8) {
2390 mid = ltz(srcElem1) ? -1 : 0;
2391 } else {
2392 mid = srcElem1 >> imm;
2393 if (ltz(srcElem1) && !ltz(mid)) {
2394 mid |= -(mid & ((Element)1 <<
2395 (sizeof(Element) * 8 - 1 - imm)));
2396 }
2397 }
2398 destElem += mid;
2399 '''
2400 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2401 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2402
2403 vrshrCode = '''
2404 if (imm > sizeof(srcElem1) * 8) {
2405 destElem = 0;
2406 } else if (imm) {
2407 Element rBit = bits(srcElem1, imm - 1);
2408 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2409 } else {
2410 destElem = srcElem1;
2411 }
2412 '''
2413 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2414 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2415
2416 vrsraCode = '''
2417 if (imm > sizeof(srcElem1) * 8) {
2418 destElem += 0;
2419 } else if (imm) {
2420 Element rBit = bits(srcElem1, imm - 1);
2421 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2422 } else {
2423 destElem += srcElem1;
2424 }
2425 '''
2426 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2427 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2428
2429 vsriCode = '''
2430 if (imm >= sizeof(Element) * 8)
2431 destElem = destElem;
2432 else
2433 destElem = (srcElem1 >> imm) |
2434 (destElem & ~mask(sizeof(Element) * 8 - imm));
2435 '''
2436 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2437 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2438
2439 vshlCode = '''
2440 if (imm >= sizeof(Element) * 8)
2441 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2442 else
2443 destElem = srcElem1 << imm;
2444 '''
2445 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2446 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2447
2448 vsliCode = '''
2449 if (imm >= sizeof(Element) * 8)
2450 destElem = destElem;
2451 else
2452 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2453 '''
2454 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2455 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2456
2457 vqshlCode = '''
2458 FPSCR fpscr = (FPSCR) FpscrQc;
2459 if (imm >= sizeof(Element) * 8) {
2460 if (srcElem1 != 0) {
2461 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2462 if (srcElem1 > 0)
2463 destElem = ~destElem;
2464 fpscr.qc = 1;
2465 } else {
2466 destElem = 0;
2467 }
2468 } else if (imm) {
2469 destElem = (srcElem1 << imm);
2470 uint64_t topBits = bits((uint64_t)srcElem1,
2471 sizeof(Element) * 8 - 1,
2472 sizeof(Element) * 8 - 1 - imm);
2473 if (topBits != 0 && topBits != mask(imm + 1)) {
2474 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2475 if (srcElem1 > 0)
2476 destElem = ~destElem;
2477 fpscr.qc = 1;
2478 }
2479 } else {
2480 destElem = srcElem1;
2481 }
2482 FpscrQc = fpscr;
2483 '''
2484 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2485 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2486
2487 vqshluCode = '''
2488 FPSCR fpscr = (FPSCR) FpscrQc;
2489 if (imm >= sizeof(Element) * 8) {
2490 if (srcElem1 != 0) {
2491 destElem = mask(sizeof(Element) * 8);
2492 fpscr.qc = 1;
2493 } else {
2494 destElem = 0;
2495 }
2496 } else if (imm) {
2497 destElem = (srcElem1 << imm);
2498 uint64_t topBits = bits((uint64_t)srcElem1,
2499 sizeof(Element) * 8 - 1,
2500 sizeof(Element) * 8 - imm);
2501 if (topBits != 0) {
2502 destElem = mask(sizeof(Element) * 8);
2503 fpscr.qc = 1;
2504 }
2505 } else {
2506 destElem = srcElem1;
2507 }
2508 FpscrQc = fpscr;
2509 '''
2510 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2511 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2512
2513 vqshlusCode = '''
2514 FPSCR fpscr = (FPSCR) FpscrQc;
2515 if (imm >= sizeof(Element) * 8) {
2516 if (srcElem1 < 0) {
2517 destElem = 0;
2518 fpscr.qc = 1;
2519 } else if (srcElem1 > 0) {
2520 destElem = mask(sizeof(Element) * 8);
2521 fpscr.qc = 1;
2522 } else {
2523 destElem = 0;
2524 }
2525 } else if (imm) {
2526 destElem = (srcElem1 << imm);
2527 uint64_t topBits = bits((uint64_t)srcElem1,
2528 sizeof(Element) * 8 - 1,
2529 sizeof(Element) * 8 - imm);
2530 if (srcElem1 < 0) {
2531 destElem = 0;
2532 fpscr.qc = 1;
2533 } else if (topBits != 0) {
2534 destElem = mask(sizeof(Element) * 8);
2535 fpscr.qc = 1;
2536 }
2537 } else {
2538 if (srcElem1 < 0) {
2539 fpscr.qc = 1;
2540 destElem = 0;
2541 } else {
2542 destElem = srcElem1;
2543 }
2544 }
2545 FpscrQc = fpscr;
2546 '''
2547 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2548 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2549
2550 vshrnCode = '''
2551 if (imm >= sizeof(srcElem1) * 8) {
2552 destElem = 0;
2553 } else {
2554 destElem = srcElem1 >> imm;
2555 }
2556 '''
2557 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2558
2559 vrshrnCode = '''
2560 if (imm > sizeof(srcElem1) * 8) {
2561 destElem = 0;
2562 } else if (imm) {
2563 Element rBit = bits(srcElem1, imm - 1);
2564 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2565 } else {
2566 destElem = srcElem1;
2567 }
2568 '''
2569 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2570
2571 vqshrnCode = '''
2572 FPSCR fpscr = (FPSCR) FpscrQc;
2573 if (imm > sizeof(srcElem1) * 8) {
2574 if (srcElem1 != 0 && srcElem1 != -1)
2575 fpscr.qc = 1;
2576 destElem = 0;
2577 } else if (imm) {
2578 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2579 mid |= -(mid & ((BigElement)1 <<
2580 (sizeof(BigElement) * 8 - 1 - imm)));
2581 if (mid != (Element)mid) {
2582 destElem = mask(sizeof(Element) * 8 - 1);
2583 if (srcElem1 < 0)
2584 destElem = ~destElem;
2585 fpscr.qc = 1;
2586 } else {
2587 destElem = mid;
2588 }
2589 } else {
2590 destElem = srcElem1;
2591 }
2592 FpscrQc = fpscr;
2593 '''
2594 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2595
2596 vqshrunCode = '''
2597 FPSCR fpscr = (FPSCR) FpscrQc;
2598 if (imm > sizeof(srcElem1) * 8) {
2599 if (srcElem1 != 0)
2600 fpscr.qc = 1;
2601 destElem = 0;
2602 } else if (imm) {
2603 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2604 if (mid != (Element)mid) {
2605 destElem = mask(sizeof(Element) * 8);
2606 fpscr.qc = 1;
2607 } else {
2608 destElem = mid;
2609 }
2610 } else {
2611 destElem = srcElem1;
2612 }
2613 FpscrQc = fpscr;
2614 '''
2615 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2616 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2617
2618 vqshrunsCode = '''
2619 FPSCR fpscr = (FPSCR) FpscrQc;
2620 if (imm > sizeof(srcElem1) * 8) {
2621 if (srcElem1 != 0)
2622 fpscr.qc = 1;
2623 destElem = 0;
2624 } else if (imm) {
2625 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2626 if (bits(mid, sizeof(BigElement) * 8 - 1,
2627 sizeof(Element) * 8) != 0) {
2628 if (srcElem1 < 0) {
2629 destElem = 0;
2630 } else {
2631 destElem = mask(sizeof(Element) * 8);
2632 }
2633 fpscr.qc = 1;
2634 } else {
2635 destElem = mid;
2636 }
2637 } else {
2638 destElem = srcElem1;
2639 }
2640 FpscrQc = fpscr;
2641 '''
2642 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2643 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2644
2645 vqrshrnCode = '''
2646 FPSCR fpscr = (FPSCR) FpscrQc;
2647 if (imm > sizeof(srcElem1) * 8) {
2648 if (srcElem1 != 0 && srcElem1 != -1)
2649 fpscr.qc = 1;
2650 destElem = 0;
2651 } else if (imm) {
2652 BigElement mid = (srcElem1 >> (imm - 1));
2653 uint64_t rBit = mid & 0x1;
2654 mid >>= 1;
2655 mid |= -(mid & ((BigElement)1 <<
2656 (sizeof(BigElement) * 8 - 1 - imm)));
2657 mid += rBit;
2658 if (mid != (Element)mid) {
2659 destElem = mask(sizeof(Element) * 8 - 1);
2660 if (srcElem1 < 0)
2661 destElem = ~destElem;
2662 fpscr.qc = 1;
2663 } else {
2664 destElem = mid;
2665 }
2666 } else {
2667 if (srcElem1 != (Element)srcElem1) {
2668 destElem = mask(sizeof(Element) * 8 - 1);
2669 if (srcElem1 < 0)
2670 destElem = ~destElem;
2671 fpscr.qc = 1;
2672 } else {
2673 destElem = srcElem1;
2674 }
2675 }
2676 FpscrQc = fpscr;
2677 '''
2678 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2679 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2680
2681 vqrshrunCode = '''
2682 FPSCR fpscr = (FPSCR) FpscrQc;
2683 if (imm > sizeof(srcElem1) * 8) {
2684 if (srcElem1 != 0)
2685 fpscr.qc = 1;
2686 destElem = 0;
2687 } else if (imm) {
2688 BigElement mid = (srcElem1 >> (imm - 1));
2689 uint64_t rBit = mid & 0x1;
2690 mid >>= 1;
2691 mid += rBit;
2692 if (mid != (Element)mid) {
2693 destElem = mask(sizeof(Element) * 8);
2694 fpscr.qc = 1;
2695 } else {
2696 destElem = mid;
2697 }
2698 } else {
2699 if (srcElem1 != (Element)srcElem1) {
2700 destElem = mask(sizeof(Element) * 8 - 1);
2701 fpscr.qc = 1;
2702 } else {
2703 destElem = srcElem1;
2704 }
2705 }
2706 FpscrQc = fpscr;
2707 '''
2708 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2709 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2710
2711 vqrshrunsCode = '''
2712 FPSCR fpscr = (FPSCR) FpscrQc;
2713 if (imm > sizeof(srcElem1) * 8) {
2714 if (srcElem1 != 0)
2715 fpscr.qc = 1;
2716 destElem = 0;
2717 } else if (imm) {
2718 BigElement mid = (srcElem1 >> (imm - 1));
2719 uint64_t rBit = mid & 0x1;
2720 mid >>= 1;
2721 mid |= -(mid & ((BigElement)1 <<
2722 (sizeof(BigElement) * 8 - 1 - imm)));
2723 mid += rBit;
2724 if (bits(mid, sizeof(BigElement) * 8 - 1,
2725 sizeof(Element) * 8) != 0) {
2726 if (srcElem1 < 0) {
2727 destElem = 0;
2728 } else {
2729 destElem = mask(sizeof(Element) * 8);
2730 }
2731 fpscr.qc = 1;
2732 } else {
2733 destElem = mid;
2734 }
2735 } else {
2736 if (srcElem1 < 0) {
2737 fpscr.qc = 1;
2738 destElem = 0;
2739 } else {
2740 destElem = srcElem1;
2741 }
2742 }
2743 FpscrQc = fpscr;
2744 '''
2745 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2746 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2747
2748 vshllCode = '''
2749 if (imm >= sizeof(destElem) * 8) {
2750 destElem = 0;
2751 } else {
2752 destElem = (BigElement)srcElem1 << imm;
2753 }
2754 '''
2755 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2756
2757 vmovlCode = '''
2758 destElem = srcElem1;
2759 '''
2760 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2761
2762 vcvt2ufxCode = '''
2763 FPSCR fpscr = (FPSCR) FpscrExc;
2764 if (flushToZero(srcElem1))
2765 fpscr.idc = 1;
2766 VfpSavedState state = prepFpState(VfpRoundNearest);
2767 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2768 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2769 __asm__ __volatile__("" :: "m" (destReg));
2770 finishVfp(fpscr, state, true);
2771 FpscrExc = fpscr;
2772 '''
2773 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2774 2, vcvt2ufxCode, toInt = True)
2775 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2776 4, vcvt2ufxCode, toInt = True)
2777
2778 vcvt2sfxCode = '''
2779 FPSCR fpscr = (FPSCR) FpscrExc;
2780 if (flushToZero(srcElem1))
2781 fpscr.idc = 1;
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2784 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2785 __asm__ __volatile__("" :: "m" (destReg));
2786 finishVfp(fpscr, state, true);
2787 FpscrExc = fpscr;
2788 '''
2789 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2790 2, vcvt2sfxCode, toInt = True)
2791 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2792 4, vcvt2sfxCode, toInt = True)
2793
2794 vcvtu2fpCode = '''
2795 FPSCR fpscr = (FPSCR) FpscrExc;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2801 FpscrExc = fpscr;
2802 '''
2803 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2804 2, vcvtu2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvtu2fpCode, fromInt = True)
2807
2808 vcvts2fpCode = '''
2809 FPSCR fpscr = (FPSCR) FpscrExc;
2810 VfpSavedState state = prepFpState(VfpRoundNearest);
2811 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2812 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2813 __asm__ __volatile__("" :: "m" (destElem));
2814 finishVfp(fpscr, state, true);
2815 FpscrExc = fpscr;
2816 '''
2817 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2818 2, vcvts2fpCode, fromInt = True)
2819 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2820 4, vcvts2fpCode, fromInt = True)
2821
2822 vcvts2hCode = '''
2823 destElem = 0;
2823 FPSCR fpscr = (FPSCR) FpscrExc;
2824 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2825 if (flushToZero(srcFp1))
2826 fpscr.idc = 1;
2827 VfpSavedState state = prepFpState(VfpRoundNearest);
2828 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2829 : "m" (srcFp1), "m" (destElem));
2830 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2831 fpscr.ahp, srcFp1);
2832 __asm__ __volatile__("" :: "m" (destElem));
2833 finishVfp(fpscr, state, true);
2834 FpscrExc = fpscr;
2835 '''
2836 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2837
2838 vcvth2sCode = '''
2824 FPSCR fpscr = (FPSCR) FpscrExc;
2825 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2826 if (flushToZero(srcFp1))
2827 fpscr.idc = 1;
2828 VfpSavedState state = prepFpState(VfpRoundNearest);
2829 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2830 : "m" (srcFp1), "m" (destElem));
2831 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2832 fpscr.ahp, srcFp1);
2833 __asm__ __volatile__("" :: "m" (destElem));
2834 finishVfp(fpscr, state, true);
2835 FpscrExc = fpscr;
2836 '''
2837 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2838
2839 vcvth2sCode = '''
2840 destElem = 0;
2839 FPSCR fpscr = (FPSCR) FpscrExc;
2840 VfpSavedState state = prepFpState(VfpRoundNearest);
2841 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2842 : "m" (srcElem1), "m" (destElem));
2843 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2844 __asm__ __volatile__("" :: "m" (destElem));
2845 finishVfp(fpscr, state, true);
2846 FpscrExc = fpscr;
2847 '''
2848 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2849
2850 vrsqrteCode = '''
2851 destElem = unsignedRSqrtEstimate(srcElem1);
2852 '''
2853 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2854 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2855
2856 vrsqrtefpCode = '''
2857 FPSCR fpscr = (FPSCR) FpscrExc;
2858 if (flushToZero(srcReg1))
2859 fpscr.idc = 1;
2860 destReg = fprSqrtEstimate(fpscr, srcReg1);
2861 FpscrExc = fpscr;
2862 '''
2863 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2864 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2865
2866 vrecpeCode = '''
2867 destElem = unsignedRecipEstimate(srcElem1);
2868 '''
2869 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2870 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2871
2872 vrecpefpCode = '''
2873 FPSCR fpscr = (FPSCR) FpscrExc;
2874 if (flushToZero(srcReg1))
2875 fpscr.idc = 1;
2876 destReg = fpRecipEstimate(fpscr, srcReg1);
2877 FpscrExc = fpscr;
2878 '''
2879 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2880 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2881
2882 vrev16Code = '''
2883 destElem = srcElem1;
2884 unsigned groupSize = ((1 << 1) / sizeof(Element));
2885 unsigned reverseMask = (groupSize - 1);
2886 j = i ^ reverseMask;
2887 '''
2888 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2889 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2890 vrev32Code = '''
2891 destElem = srcElem1;
2892 unsigned groupSize = ((1 << 2) / sizeof(Element));
2893 unsigned reverseMask = (groupSize - 1);
2894 j = i ^ reverseMask;
2895 '''
2896 twoRegMiscInst("vrev32", "NVrev32D",
2897 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2898 twoRegMiscInst("vrev32", "NVrev32Q",
2899 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2900 vrev64Code = '''
2901 destElem = srcElem1;
2902 unsigned groupSize = ((1 << 3) / sizeof(Element));
2903 unsigned reverseMask = (groupSize - 1);
2904 j = i ^ reverseMask;
2905 '''
2906 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2907 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2908
2909 vpaddlCode = '''
2910 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2911 '''
2912 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2913 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2914
2915 vpadalCode = '''
2916 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2917 '''
2918 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2919 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2920
2921 vclsCode = '''
2922 unsigned count = 0;
2923 if (srcElem1 < 0) {
2924 srcElem1 <<= 1;
2925 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2926 count++;
2927 srcElem1 <<= 1;
2928 }
2929 } else {
2930 srcElem1 <<= 1;
2931 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2932 count++;
2933 srcElem1 <<= 1;
2934 }
2935 }
2936 destElem = count;
2937 '''
2938 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2939 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2940
2941 vclzCode = '''
2942 unsigned count = 0;
2943 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2944 count++;
2945 srcElem1 <<= 1;
2946 }
2947 destElem = count;
2948 '''
2949 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2950 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2951
2952 vcntCode = '''
2953 unsigned count = 0;
2954 while (srcElem1 && count < sizeof(Element) * 8) {
2955 count += srcElem1 & 0x1;
2956 srcElem1 >>= 1;
2957 }
2958 destElem = count;
2959 '''
2960
2961 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2962 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2963
2964 vmvnCode = '''
2965 destElem = ~srcElem1;
2966 '''
2967 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2968 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2969
2970 vqabsCode = '''
2971 FPSCR fpscr = (FPSCR) FpscrQc;
2972 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2973 fpscr.qc = 1;
2974 destElem = ~srcElem1;
2975 } else if (srcElem1 < 0) {
2976 destElem = -srcElem1;
2977 } else {
2978 destElem = srcElem1;
2979 }
2980 FpscrQc = fpscr;
2981 '''
2982 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2983 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2984
2985 vqnegCode = '''
2986 FPSCR fpscr = (FPSCR) FpscrQc;
2987 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2988 fpscr.qc = 1;
2989 destElem = ~srcElem1;
2990 } else {
2991 destElem = -srcElem1;
2992 }
2993 FpscrQc = fpscr;
2994 '''
2995 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2996 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2997
2998 vabsCode = '''
2999 if (srcElem1 < 0) {
3000 destElem = -srcElem1;
3001 } else {
3002 destElem = srcElem1;
3003 }
3004 '''
3005
3006 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3007 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3008 vabsfpCode = '''
3009 union
3010 {
3011 uint32_t i;
3012 float f;
3013 } cStruct;
3014 cStruct.f = srcReg1;
3015 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3016 destReg = cStruct.f;
3017 '''
3018 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3019 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3020
3021 vnegCode = '''
3022 destElem = -srcElem1;
3023 '''
3024 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3025 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3026 vnegfpCode = '''
3027 destReg = -srcReg1;
3028 '''
3029 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3030 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3031
3032 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3033 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3034 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3035 vcgtfpCode = '''
3036 FPSCR fpscr = (FPSCR) FpscrExc;
3037 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3038 true, true, VfpRoundNearest);
3039 destReg = (res == 0) ? -1 : 0;
3040 if (res == 2.0)
3041 fpscr.ioc = 1;
3042 FpscrExc = fpscr;
3043 '''
3044 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3045 2, vcgtfpCode, toInt = True)
3046 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3047 4, vcgtfpCode, toInt = True)
3048
3049 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3050 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3051 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3052 vcgefpCode = '''
3053 FPSCR fpscr = (FPSCR) FpscrExc;
3054 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3055 true, true, VfpRoundNearest);
3056 destReg = (res == 0) ? -1 : 0;
3057 if (res == 2.0)
3058 fpscr.ioc = 1;
3059 FpscrExc = fpscr;
3060 '''
3061 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3062 2, vcgefpCode, toInt = True)
3063 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3064 4, vcgefpCode, toInt = True)
3065
3066 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3067 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3068 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3069 vceqfpCode = '''
3070 FPSCR fpscr = (FPSCR) FpscrExc;
3071 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3072 true, true, VfpRoundNearest);
3073 destReg = (res == 0) ? -1 : 0;
3074 if (res == 2.0)
3075 fpscr.ioc = 1;
3076 FpscrExc = fpscr;
3077 '''
3078 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3079 2, vceqfpCode, toInt = True)
3080 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3081 4, vceqfpCode, toInt = True)
3082
3083 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3084 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3085 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3086 vclefpCode = '''
3087 FPSCR fpscr = (FPSCR) FpscrExc;
3088 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3089 true, true, VfpRoundNearest);
3090 destReg = (res == 0) ? -1 : 0;
3091 if (res == 2.0)
3092 fpscr.ioc = 1;
3093 FpscrExc = fpscr;
3094 '''
3095 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3096 2, vclefpCode, toInt = True)
3097 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3098 4, vclefpCode, toInt = True)
3099
3100 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3101 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3102 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3103 vcltfpCode = '''
3104 FPSCR fpscr = (FPSCR) FpscrExc;
3105 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3106 true, true, VfpRoundNearest);
3107 destReg = (res == 0) ? -1 : 0;
3108 if (res == 2.0)
3109 fpscr.ioc = 1;
3110 FpscrExc = fpscr;
3111 '''
3112 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3113 2, vcltfpCode, toInt = True)
3114 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3115 4, vcltfpCode, toInt = True)
3116
3117 vswpCode = '''
3118 FloatRegBits mid;
3119 for (unsigned r = 0; r < rCount; r++) {
3120 mid = srcReg1.regs[r];
3121 srcReg1.regs[r] = destReg.regs[r];
3122 destReg.regs[r] = mid;
3123 }
3124 '''
3125 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3126 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3127
3128 vtrnCode = '''
3129 Element mid;
3130 for (unsigned i = 0; i < eCount; i += 2) {
3131 mid = srcReg1.elements[i];
3132 srcReg1.elements[i] = destReg.elements[i + 1];
3133 destReg.elements[i + 1] = mid;
3134 }
3135 '''
3136 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3137 smallUnsignedTypes, 2, vtrnCode)
3138 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3139 smallUnsignedTypes, 4, vtrnCode)
3140
3141 vuzpCode = '''
3142 Element mid[eCount];
3143 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3144 for (unsigned i = 0; i < eCount / 2; i++) {
3145 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3146 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3147 destReg.elements[i] = destReg.elements[2 * i];
3148 }
3149 for (unsigned i = 0; i < eCount / 2; i++) {
3150 destReg.elements[eCount / 2 + i] = mid[2 * i];
3151 }
3152 '''
3153 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3154 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3155
3156 vzipCode = '''
3157 Element mid[eCount];
3158 memcpy(&mid, &destReg, sizeof(destReg));
3159 for (unsigned i = 0; i < eCount / 2; i++) {
3160 destReg.elements[2 * i] = mid[i];
3161 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3162 }
3163 for (int i = 0; i < eCount / 2; i++) {
3164 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3165 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3166 }
3167 '''
3168 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3169 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3170
3171 vmovnCode = 'destElem = srcElem1;'
3172 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3173
3174 vdupCode = 'destElem = srcElem1;'
3175 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3176 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3177
3178 def vdupGprInst(name, Name, opClass, types, rCount):
3179 global header_output, exec_output
3180 eWalkCode = '''
3181 RegVect destReg;
3182 for (unsigned i = 0; i < eCount; i++) {
3183 destReg.elements[i] = htog((Element)Op1);
3184 }
3185 '''
3186 for reg in range(rCount):
3187 eWalkCode += '''
3188 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3189 ''' % { "reg" : reg }
3190 iop = InstObjParams(name, Name,
3191 "RegRegOp",
3192 { "code": eWalkCode,
3193 "r_count": rCount,
3194 "predicate_test": predicateTest,
3195 "op_class": opClass }, [])
3196 header_output += NeonRegRegOpDeclare.subst(iop)
3197 exec_output += NeonEqualRegExecute.subst(iop)
3198 for type in types:
3199 substDict = { "targs" : type,
3200 "class_name" : Name }
3201 exec_output += NeonExecDeclare.subst(substDict)
3202 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3203 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3204
3205 vmovCode = 'destElem = imm;'
3206 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3207 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3208
3209 vorrCode = 'destElem |= imm;'
3210 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3211 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3212
3213 vmvnCode = 'destElem = ~imm;'
3214 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3215 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3216
3217 vbicCode = 'destElem &= ~imm;'
3218 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3219 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3220
3221 vqmovnCode = '''
3222 FPSCR fpscr = (FPSCR) FpscrQc;
3223 destElem = srcElem1;
3224 if ((BigElement)destElem != srcElem1) {
3225 fpscr.qc = 1;
3226 destElem = mask(sizeof(Element) * 8 - 1);
3227 if (srcElem1 < 0)
3228 destElem = ~destElem;
3229 }
3230 FpscrQc = fpscr;
3231 '''
3232 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3233
3234 vqmovunCode = '''
3235 FPSCR fpscr = (FPSCR) FpscrQc;
3236 destElem = srcElem1;
3237 if ((BigElement)destElem != srcElem1) {
3238 fpscr.qc = 1;
3239 destElem = mask(sizeof(Element) * 8);
3240 }
3241 FpscrQc = fpscr;
3242 '''
3243 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3244 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3245
3246 vqmovunsCode = '''
3247 FPSCR fpscr = (FPSCR) FpscrQc;
3248 destElem = srcElem1;
3249 if (srcElem1 < 0 ||
3250 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3251 fpscr.qc = 1;
3252 destElem = mask(sizeof(Element) * 8);
3253 if (srcElem1 < 0)
3254 destElem = ~destElem;
3255 }
3256 FpscrQc = fpscr;
3257 '''
3258 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3259 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3260
3261 def buildVext(name, Name, opClass, types, rCount, op):
3262 global header_output, exec_output
3263 eWalkCode = '''
3264 RegVect srcReg1, srcReg2, destReg;
3265 '''
3266 for reg in range(rCount):
3267 eWalkCode += simdEnabledCheckCode + '''
3268 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3269 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3270 ''' % { "reg" : reg }
3271 eWalkCode += op
3272 for reg in range(rCount):
3273 eWalkCode += '''
3274 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3275 ''' % { "reg" : reg }
3276 iop = InstObjParams(name, Name,
3277 "RegRegRegImmOp",
3278 { "code": eWalkCode,
3279 "r_count": rCount,
3280 "predicate_test": predicateTest,
3281 "op_class": opClass }, [])
3282 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3283 exec_output += NeonEqualRegExecute.subst(iop)
3284 for type in types:
3285 substDict = { "targs" : type,
3286 "class_name" : Name }
3287 exec_output += NeonExecDeclare.subst(substDict)
3288
3289 vextCode = '''
3290 for (unsigned i = 0; i < eCount; i++) {
3291 unsigned index = i + imm;
3292 if (index < eCount) {
3293 destReg.elements[i] = srcReg1.elements[index];
3294 } else {
3295 index -= eCount;
3296 if (index >= eCount) {
3297 if (FullSystem)
3298 fault = new UndefinedInstruction;
3299 else
3300 fault = new UndefinedInstruction(false, mnemonic);
3301 } else {
3302 destReg.elements[i] = srcReg2.elements[index];
3303 }
3304 }
3305 }
3306 '''
3307 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3308 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3309
3310 def buildVtbxl(name, Name, opClass, length, isVtbl):
3311 global header_output, decoder_output, exec_output
3312 code = '''
3313 union
3314 {
3315 uint8_t bytes[32];
3316 FloatRegBits regs[8];
3317 } table;
3318
3319 union
3320 {
3321 uint8_t bytes[8];
3322 FloatRegBits regs[2];
3323 } destReg, srcReg2;
3324
3325 const unsigned length = %(length)d;
3326 const bool isVtbl = %(isVtbl)s;
3327
3328 srcReg2.regs[0] = htog(FpOp2P0_uw);
3329 srcReg2.regs[1] = htog(FpOp2P1_uw);
3330
3331 destReg.regs[0] = htog(FpDestP0_uw);
3332 destReg.regs[1] = htog(FpDestP1_uw);
3333 ''' % { "length" : length, "isVtbl" : isVtbl }
3334 for reg in range(8):
3335 if reg < length * 2:
3336 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3337 { "reg" : reg }
3338 else:
3339 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3340 code += '''
3341 for (unsigned i = 0; i < sizeof(destReg); i++) {
3342 uint8_t index = srcReg2.bytes[i];
3343 if (index < 8 * length) {
3344 destReg.bytes[i] = table.bytes[index];
3345 } else {
3346 if (isVtbl)
3347 destReg.bytes[i] = 0;
3348 // else destReg.bytes[i] unchanged
3349 }
3350 }
3351
3352 FpDestP0_uw = gtoh(destReg.regs[0]);
3353 FpDestP1_uw = gtoh(destReg.regs[1]);
3354 '''
3355 iop = InstObjParams(name, Name,
3356 "RegRegRegOp",
3357 { "code": code,
3358 "predicate_test": predicateTest,
3359 "op_class": opClass }, [])
3360 header_output += RegRegRegOpDeclare.subst(iop)
3361 decoder_output += RegRegRegOpConstructor.subst(iop)
3362 exec_output += PredOpExecute.subst(iop)
3363
3364 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3365 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3366 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3367 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3368
3369 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3370 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3371 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3372 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3373}};
2841 FPSCR fpscr = (FPSCR) FpscrExc;
2842 VfpSavedState state = prepFpState(VfpRoundNearest);
2843 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2844 : "m" (srcElem1), "m" (destElem));
2845 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2846 __asm__ __volatile__("" :: "m" (destElem));
2847 finishVfp(fpscr, state, true);
2848 FpscrExc = fpscr;
2849 '''
2850 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2851
2852 vrsqrteCode = '''
2853 destElem = unsignedRSqrtEstimate(srcElem1);
2854 '''
2855 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2856 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2857
2858 vrsqrtefpCode = '''
2859 FPSCR fpscr = (FPSCR) FpscrExc;
2860 if (flushToZero(srcReg1))
2861 fpscr.idc = 1;
2862 destReg = fprSqrtEstimate(fpscr, srcReg1);
2863 FpscrExc = fpscr;
2864 '''
2865 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2866 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2867
2868 vrecpeCode = '''
2869 destElem = unsignedRecipEstimate(srcElem1);
2870 '''
2871 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2872 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2873
2874 vrecpefpCode = '''
2875 FPSCR fpscr = (FPSCR) FpscrExc;
2876 if (flushToZero(srcReg1))
2877 fpscr.idc = 1;
2878 destReg = fpRecipEstimate(fpscr, srcReg1);
2879 FpscrExc = fpscr;
2880 '''
2881 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2882 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2883
2884 vrev16Code = '''
2885 destElem = srcElem1;
2886 unsigned groupSize = ((1 << 1) / sizeof(Element));
2887 unsigned reverseMask = (groupSize - 1);
2888 j = i ^ reverseMask;
2889 '''
2890 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2891 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2892 vrev32Code = '''
2893 destElem = srcElem1;
2894 unsigned groupSize = ((1 << 2) / sizeof(Element));
2895 unsigned reverseMask = (groupSize - 1);
2896 j = i ^ reverseMask;
2897 '''
2898 twoRegMiscInst("vrev32", "NVrev32D",
2899 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2900 twoRegMiscInst("vrev32", "NVrev32Q",
2901 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2902 vrev64Code = '''
2903 destElem = srcElem1;
2904 unsigned groupSize = ((1 << 3) / sizeof(Element));
2905 unsigned reverseMask = (groupSize - 1);
2906 j = i ^ reverseMask;
2907 '''
2908 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2909 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2910
2911 vpaddlCode = '''
2912 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2913 '''
2914 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2915 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2916
2917 vpadalCode = '''
2918 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2919 '''
2920 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2921 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2922
2923 vclsCode = '''
2924 unsigned count = 0;
2925 if (srcElem1 < 0) {
2926 srcElem1 <<= 1;
2927 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2928 count++;
2929 srcElem1 <<= 1;
2930 }
2931 } else {
2932 srcElem1 <<= 1;
2933 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2934 count++;
2935 srcElem1 <<= 1;
2936 }
2937 }
2938 destElem = count;
2939 '''
2940 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2941 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2942
2943 vclzCode = '''
2944 unsigned count = 0;
2945 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2946 count++;
2947 srcElem1 <<= 1;
2948 }
2949 destElem = count;
2950 '''
2951 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2952 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2953
2954 vcntCode = '''
2955 unsigned count = 0;
2956 while (srcElem1 && count < sizeof(Element) * 8) {
2957 count += srcElem1 & 0x1;
2958 srcElem1 >>= 1;
2959 }
2960 destElem = count;
2961 '''
2962
2963 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2964 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2965
2966 vmvnCode = '''
2967 destElem = ~srcElem1;
2968 '''
2969 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2970 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2971
2972 vqabsCode = '''
2973 FPSCR fpscr = (FPSCR) FpscrQc;
2974 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2975 fpscr.qc = 1;
2976 destElem = ~srcElem1;
2977 } else if (srcElem1 < 0) {
2978 destElem = -srcElem1;
2979 } else {
2980 destElem = srcElem1;
2981 }
2982 FpscrQc = fpscr;
2983 '''
2984 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2985 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2986
2987 vqnegCode = '''
2988 FPSCR fpscr = (FPSCR) FpscrQc;
2989 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2990 fpscr.qc = 1;
2991 destElem = ~srcElem1;
2992 } else {
2993 destElem = -srcElem1;
2994 }
2995 FpscrQc = fpscr;
2996 '''
2997 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2998 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2999
3000 vabsCode = '''
3001 if (srcElem1 < 0) {
3002 destElem = -srcElem1;
3003 } else {
3004 destElem = srcElem1;
3005 }
3006 '''
3007
3008 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3009 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3010 vabsfpCode = '''
3011 union
3012 {
3013 uint32_t i;
3014 float f;
3015 } cStruct;
3016 cStruct.f = srcReg1;
3017 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3018 destReg = cStruct.f;
3019 '''
3020 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3021 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3022
3023 vnegCode = '''
3024 destElem = -srcElem1;
3025 '''
3026 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3027 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3028 vnegfpCode = '''
3029 destReg = -srcReg1;
3030 '''
3031 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3032 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3033
3034 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3035 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3036 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3037 vcgtfpCode = '''
3038 FPSCR fpscr = (FPSCR) FpscrExc;
3039 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3040 true, true, VfpRoundNearest);
3041 destReg = (res == 0) ? -1 : 0;
3042 if (res == 2.0)
3043 fpscr.ioc = 1;
3044 FpscrExc = fpscr;
3045 '''
3046 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3047 2, vcgtfpCode, toInt = True)
3048 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3049 4, vcgtfpCode, toInt = True)
3050
3051 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3052 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3053 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3054 vcgefpCode = '''
3055 FPSCR fpscr = (FPSCR) FpscrExc;
3056 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3057 true, true, VfpRoundNearest);
3058 destReg = (res == 0) ? -1 : 0;
3059 if (res == 2.0)
3060 fpscr.ioc = 1;
3061 FpscrExc = fpscr;
3062 '''
3063 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3064 2, vcgefpCode, toInt = True)
3065 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3066 4, vcgefpCode, toInt = True)
3067
3068 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3069 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3070 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3071 vceqfpCode = '''
3072 FPSCR fpscr = (FPSCR) FpscrExc;
3073 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3074 true, true, VfpRoundNearest);
3075 destReg = (res == 0) ? -1 : 0;
3076 if (res == 2.0)
3077 fpscr.ioc = 1;
3078 FpscrExc = fpscr;
3079 '''
3080 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3081 2, vceqfpCode, toInt = True)
3082 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3083 4, vceqfpCode, toInt = True)
3084
3085 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3086 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3087 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3088 vclefpCode = '''
3089 FPSCR fpscr = (FPSCR) FpscrExc;
3090 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3091 true, true, VfpRoundNearest);
3092 destReg = (res == 0) ? -1 : 0;
3093 if (res == 2.0)
3094 fpscr.ioc = 1;
3095 FpscrExc = fpscr;
3096 '''
3097 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3098 2, vclefpCode, toInt = True)
3099 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3100 4, vclefpCode, toInt = True)
3101
3102 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3103 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3104 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3105 vcltfpCode = '''
3106 FPSCR fpscr = (FPSCR) FpscrExc;
3107 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3108 true, true, VfpRoundNearest);
3109 destReg = (res == 0) ? -1 : 0;
3110 if (res == 2.0)
3111 fpscr.ioc = 1;
3112 FpscrExc = fpscr;
3113 '''
3114 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3115 2, vcltfpCode, toInt = True)
3116 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3117 4, vcltfpCode, toInt = True)
3118
3119 vswpCode = '''
3120 FloatRegBits mid;
3121 for (unsigned r = 0; r < rCount; r++) {
3122 mid = srcReg1.regs[r];
3123 srcReg1.regs[r] = destReg.regs[r];
3124 destReg.regs[r] = mid;
3125 }
3126 '''
3127 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3128 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3129
3130 vtrnCode = '''
3131 Element mid;
3132 for (unsigned i = 0; i < eCount; i += 2) {
3133 mid = srcReg1.elements[i];
3134 srcReg1.elements[i] = destReg.elements[i + 1];
3135 destReg.elements[i + 1] = mid;
3136 }
3137 '''
3138 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3139 smallUnsignedTypes, 2, vtrnCode)
3140 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3141 smallUnsignedTypes, 4, vtrnCode)
3142
3143 vuzpCode = '''
3144 Element mid[eCount];
3145 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3146 for (unsigned i = 0; i < eCount / 2; i++) {
3147 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3148 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3149 destReg.elements[i] = destReg.elements[2 * i];
3150 }
3151 for (unsigned i = 0; i < eCount / 2; i++) {
3152 destReg.elements[eCount / 2 + i] = mid[2 * i];
3153 }
3154 '''
3155 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3156 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3157
3158 vzipCode = '''
3159 Element mid[eCount];
3160 memcpy(&mid, &destReg, sizeof(destReg));
3161 for (unsigned i = 0; i < eCount / 2; i++) {
3162 destReg.elements[2 * i] = mid[i];
3163 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3164 }
3165 for (int i = 0; i < eCount / 2; i++) {
3166 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3167 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3168 }
3169 '''
3170 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3171 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3172
3173 vmovnCode = 'destElem = srcElem1;'
3174 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3175
3176 vdupCode = 'destElem = srcElem1;'
3177 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3178 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3179
3180 def vdupGprInst(name, Name, opClass, types, rCount):
3181 global header_output, exec_output
3182 eWalkCode = '''
3183 RegVect destReg;
3184 for (unsigned i = 0; i < eCount; i++) {
3185 destReg.elements[i] = htog((Element)Op1);
3186 }
3187 '''
3188 for reg in range(rCount):
3189 eWalkCode += '''
3190 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3191 ''' % { "reg" : reg }
3192 iop = InstObjParams(name, Name,
3193 "RegRegOp",
3194 { "code": eWalkCode,
3195 "r_count": rCount,
3196 "predicate_test": predicateTest,
3197 "op_class": opClass }, [])
3198 header_output += NeonRegRegOpDeclare.subst(iop)
3199 exec_output += NeonEqualRegExecute.subst(iop)
3200 for type in types:
3201 substDict = { "targs" : type,
3202 "class_name" : Name }
3203 exec_output += NeonExecDeclare.subst(substDict)
3204 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3205 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3206
3207 vmovCode = 'destElem = imm;'
3208 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3209 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3210
3211 vorrCode = 'destElem |= imm;'
3212 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3213 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3214
3215 vmvnCode = 'destElem = ~imm;'
3216 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3217 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3218
3219 vbicCode = 'destElem &= ~imm;'
3220 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3221 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3222
3223 vqmovnCode = '''
3224 FPSCR fpscr = (FPSCR) FpscrQc;
3225 destElem = srcElem1;
3226 if ((BigElement)destElem != srcElem1) {
3227 fpscr.qc = 1;
3228 destElem = mask(sizeof(Element) * 8 - 1);
3229 if (srcElem1 < 0)
3230 destElem = ~destElem;
3231 }
3232 FpscrQc = fpscr;
3233 '''
3234 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3235
3236 vqmovunCode = '''
3237 FPSCR fpscr = (FPSCR) FpscrQc;
3238 destElem = srcElem1;
3239 if ((BigElement)destElem != srcElem1) {
3240 fpscr.qc = 1;
3241 destElem = mask(sizeof(Element) * 8);
3242 }
3243 FpscrQc = fpscr;
3244 '''
3245 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3246 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3247
3248 vqmovunsCode = '''
3249 FPSCR fpscr = (FPSCR) FpscrQc;
3250 destElem = srcElem1;
3251 if (srcElem1 < 0 ||
3252 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3253 fpscr.qc = 1;
3254 destElem = mask(sizeof(Element) * 8);
3255 if (srcElem1 < 0)
3256 destElem = ~destElem;
3257 }
3258 FpscrQc = fpscr;
3259 '''
3260 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3261 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3262
3263 def buildVext(name, Name, opClass, types, rCount, op):
3264 global header_output, exec_output
3265 eWalkCode = '''
3266 RegVect srcReg1, srcReg2, destReg;
3267 '''
3268 for reg in range(rCount):
3269 eWalkCode += simdEnabledCheckCode + '''
3270 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3271 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3272 ''' % { "reg" : reg }
3273 eWalkCode += op
3274 for reg in range(rCount):
3275 eWalkCode += '''
3276 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3277 ''' % { "reg" : reg }
3278 iop = InstObjParams(name, Name,
3279 "RegRegRegImmOp",
3280 { "code": eWalkCode,
3281 "r_count": rCount,
3282 "predicate_test": predicateTest,
3283 "op_class": opClass }, [])
3284 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3285 exec_output += NeonEqualRegExecute.subst(iop)
3286 for type in types:
3287 substDict = { "targs" : type,
3288 "class_name" : Name }
3289 exec_output += NeonExecDeclare.subst(substDict)
3290
3291 vextCode = '''
3292 for (unsigned i = 0; i < eCount; i++) {
3293 unsigned index = i + imm;
3294 if (index < eCount) {
3295 destReg.elements[i] = srcReg1.elements[index];
3296 } else {
3297 index -= eCount;
3298 if (index >= eCount) {
3299 if (FullSystem)
3300 fault = new UndefinedInstruction;
3301 else
3302 fault = new UndefinedInstruction(false, mnemonic);
3303 } else {
3304 destReg.elements[i] = srcReg2.elements[index];
3305 }
3306 }
3307 }
3308 '''
3309 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3310 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3311
3312 def buildVtbxl(name, Name, opClass, length, isVtbl):
3313 global header_output, decoder_output, exec_output
3314 code = '''
3315 union
3316 {
3317 uint8_t bytes[32];
3318 FloatRegBits regs[8];
3319 } table;
3320
3321 union
3322 {
3323 uint8_t bytes[8];
3324 FloatRegBits regs[2];
3325 } destReg, srcReg2;
3326
3327 const unsigned length = %(length)d;
3328 const bool isVtbl = %(isVtbl)s;
3329
3330 srcReg2.regs[0] = htog(FpOp2P0_uw);
3331 srcReg2.regs[1] = htog(FpOp2P1_uw);
3332
3333 destReg.regs[0] = htog(FpDestP0_uw);
3334 destReg.regs[1] = htog(FpDestP1_uw);
3335 ''' % { "length" : length, "isVtbl" : isVtbl }
3336 for reg in range(8):
3337 if reg < length * 2:
3338 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3339 { "reg" : reg }
3340 else:
3341 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3342 code += '''
3343 for (unsigned i = 0; i < sizeof(destReg); i++) {
3344 uint8_t index = srcReg2.bytes[i];
3345 if (index < 8 * length) {
3346 destReg.bytes[i] = table.bytes[index];
3347 } else {
3348 if (isVtbl)
3349 destReg.bytes[i] = 0;
3350 // else destReg.bytes[i] unchanged
3351 }
3352 }
3353
3354 FpDestP0_uw = gtoh(destReg.regs[0]);
3355 FpDestP1_uw = gtoh(destReg.regs[1]);
3356 '''
3357 iop = InstObjParams(name, Name,
3358 "RegRegRegOp",
3359 { "code": code,
3360 "predicate_test": predicateTest,
3361 "op_class": opClass }, [])
3362 header_output += RegRegRegOpDeclare.subst(iop)
3363 decoder_output += RegRegRegOpConstructor.subst(iop)
3364 exec_output += PredOpExecute.subst(iop)
3365
3366 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3367 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3368 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3369 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3370
3371 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3372 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3373 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3374 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3375}};