neon.isa (8782:10c9297e14d5) neon.isa (8795:0909f8ed7aa0)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (isSnan(op1) || isSnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (isSnan(op1) || isSnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (isSnan(op1) || isSnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (isSnan(op1) || isSnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (isSnan(op1) || isSnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (isSnan(op1) || isSnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'
704 else:
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
707 eWalkCode += '''
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
711 if readDest:
712 if toInt:
713 eWalkCode += '''
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
716 else:
717 eWalkCode += '''
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
720 readDestCode = ''
721 if readDest:
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
725 if toInt:
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
728 if pairwise:
729 eWalkCode += '''
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
736 %(readDest)s
737 %(op)s
738 %(writeDest)s
739 }
740 ''' % { "op" : op,
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
744 else:
745 eWalkCode += '''
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
750 %(readDest)s
751 %(op)s
752 %(writeDest)s
753 }
754 ''' % { "op" : op,
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
759 if toInt:
760 eWalkCode += '''
761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:
789 src2Cnt = 4
790 src2Prefix = 'Big'
791 if bigDest:
792 destCnt = 4
793 destPrefix = 'Big'
794 eWalkCode = simdEnabledCheckCode + '''
795 %sRegVect srcReg1;
796 %sRegVect srcReg2;
797 %sRegVect destReg;
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
800 eWalkCode += '''
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
804 eWalkCode += '''
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806 ''' % { "reg" : reg }
807 if readDest:
808 for reg in range(destCnt):
809 eWalkCode += '''
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811 ''' % { "reg" : reg }
812 readDestCode = ''
813 if readDest:
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815 eWalkCode += '''
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
820 %(readDest)s
821 %(op)s
822 destReg.elements[i] = htog(destElem);
823 }
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
847
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
851
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
855
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865 ''' % { "reg" : reg }
866 if readDest:
867 eWalkCode += '''
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869 ''' % { "reg" : reg }
870 readDestCode = ''
871 if readDest:
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873 eWalkCode += '''
874 if (imm < 0 && imm >= eCount) {
875 if (FullSystem)
876 fault = new UndefinedInstruction;
877 else
878 fault = new UndefinedInstruction(false, mnemonic);
879 } else {
880 for (unsigned i = 0; i < eCount; i++) {
881 Element srcElem1 = gtoh(srcReg1.elements[i]);
882 Element srcElem2 = gtoh(srcReg2.elements[imm]);
883 Element destElem;
884 %(readDest)s
885 %(op)s
886 destReg.elements[i] = htog(destElem);
887 }
888 }
889 ''' % { "op" : op, "readDest" : readDestCode }
890 for reg in range(rCount):
891 eWalkCode += '''
892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893 ''' % { "reg" : reg }
894 iop = InstObjParams(name, Name,
895 "RegRegRegImmOp",
896 { "code": eWalkCode,
897 "r_count": rCount,
898 "predicate_test": predicateTest,
899 "op_class": opClass }, [])
900 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901 exec_output += NeonEqualRegExecute.subst(iop)
902 for type in types:
903 substDict = { "targs" : type,
904 "class_name" : Name }
905 exec_output += NeonExecDeclare.subst(substDict)
906
907 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908 global header_output, exec_output
909 rCount = 2
910 eWalkCode = simdEnabledCheckCode + '''
911 RegVect srcReg1, srcReg2;
912 BigRegVect destReg;
913 '''
914 for reg in range(rCount):
915 eWalkCode += '''
916 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918 ''' % { "reg" : reg }
919 if readDest:
920 for reg in range(2 * rCount):
921 eWalkCode += '''
922 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923 ''' % { "reg" : reg }
924 readDestCode = ''
925 if readDest:
926 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
927 eWalkCode += '''
928 if (imm < 0 && imm >= eCount) {
929 if (FullSystem)
930 fault = new UndefinedInstruction;
931 else
932 fault = new UndefinedInstruction(false, mnemonic);
933 } else {
934 for (unsigned i = 0; i < eCount; i++) {
935 Element srcElem1 = gtoh(srcReg1.elements[i]);
936 Element srcElem2 = gtoh(srcReg2.elements[imm]);
937 BigElement destElem;
938 %(readDest)s
939 %(op)s
940 destReg.elements[i] = htog(destElem);
941 }
942 }
943 ''' % { "op" : op, "readDest" : readDestCode }
944 for reg in range(2 * rCount):
945 eWalkCode += '''
946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947 ''' % { "reg" : reg }
948 iop = InstObjParams(name, Name,
949 "RegRegRegImmOp",
950 { "code": eWalkCode,
951 "r_count": rCount,
952 "predicate_test": predicateTest,
953 "op_class": opClass }, [])
954 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955 exec_output += NeonUnequalRegExecute.subst(iop)
956 for type in types:
957 substDict = { "targs" : type,
958 "class_name" : Name }
959 exec_output += NeonExecDeclare.subst(substDict)
960
961 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962 global header_output, exec_output
963 eWalkCode = simdEnabledCheckCode + '''
964 typedef FloatReg FloatVect[rCount];
965 FloatVect srcRegs1, srcRegs2, destRegs;
966 '''
967 for reg in range(rCount):
968 eWalkCode += '''
969 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971 ''' % { "reg" : reg }
972 if readDest:
973 eWalkCode += '''
974 destRegs[%(reg)d] = FpDestP%(reg)d;
975 ''' % { "reg" : reg }
976 readDestCode = ''
977 if readDest:
978 readDestCode = 'destReg = destRegs[i];'
979 eWalkCode += '''
980 if (imm < 0 && imm >= eCount) {
981 if (FullSystem)
982 fault = new UndefinedInstruction;
983 else
984 fault = new UndefinedInstruction(false, mnemonic);
985 } else {
986 for (unsigned i = 0; i < rCount; i++) {
987 FloatReg srcReg1 = srcRegs1[i];
988 FloatReg srcReg2 = srcRegs2[imm];
989 FloatReg destReg;
990 %(readDest)s
991 %(op)s
992 destRegs[i] = destReg;
993 }
994 }
995 ''' % { "op" : op, "readDest" : readDestCode }
996 for reg in range(rCount):
997 eWalkCode += '''
998 FpDestP%(reg)d = destRegs[%(reg)d];
999 ''' % { "reg" : reg }
1000 iop = InstObjParams(name, Name,
1001 "FpRegRegRegImmOp",
1002 { "code": eWalkCode,
1003 "r_count": rCount,
1004 "predicate_test": predicateTest,
1005 "op_class": opClass }, [])
1006 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007 exec_output += NeonEqualRegExecute.subst(iop)
1008 for type in types:
1009 substDict = { "targs" : type,
1010 "class_name" : Name }
1011 exec_output += NeonExecDeclare.subst(substDict)
1012
1013 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014 readDest=False, toInt=False, fromInt=False):
1015 global header_output, exec_output
1016 eWalkCode = simdEnabledCheckCode + '''
1017 RegVect srcRegs1, destRegs;
1018 '''
1019 for reg in range(rCount):
1020 eWalkCode += '''
1021 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022 ''' % { "reg" : reg }
1023 if readDest:
1024 eWalkCode += '''
1025 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026 ''' % { "reg" : reg }
1027 readDestCode = ''
1028 if readDest:
1029 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1030 if toInt:
1031 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1033 if fromInt:
1034 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035 declDest = 'Element destElem;'
1036 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1037 if toInt:
1038 declDest = 'FloatRegBits destReg;'
1039 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1040 eWalkCode += '''
1041 for (unsigned i = 0; i < eCount; i++) {
1042 %(readOp)s
1043 %(declDest)s
1044 %(readDest)s
1045 %(op)s
1046 %(writeDest)s
1047 }
1048 ''' % { "readOp" : readOpCode,
1049 "declDest" : declDest,
1050 "readDest" : readDestCode,
1051 "op" : op,
1052 "writeDest" : writeDestCode }
1053 for reg in range(rCount):
1054 eWalkCode += '''
1055 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056 ''' % { "reg" : reg }
1057 iop = InstObjParams(name, Name,
1058 "RegRegImmOp",
1059 { "code": eWalkCode,
1060 "r_count": rCount,
1061 "predicate_test": predicateTest,
1062 "op_class": opClass }, [])
1063 header_output += NeonRegRegImmOpDeclare.subst(iop)
1064 exec_output += NeonEqualRegExecute.subst(iop)
1065 for type in types:
1066 substDict = { "targs" : type,
1067 "class_name" : Name }
1068 exec_output += NeonExecDeclare.subst(substDict)
1069
1070 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071 global header_output, exec_output
1072 eWalkCode = simdEnabledCheckCode + '''
1073 BigRegVect srcReg1;
1074 RegVect destReg;
1075 '''
1076 for reg in range(4):
1077 eWalkCode += '''
1078 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079 ''' % { "reg" : reg }
1080 if readDest:
1081 for reg in range(2):
1082 eWalkCode += '''
1083 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084 ''' % { "reg" : reg }
1085 readDestCode = ''
1086 if readDest:
1087 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1088 eWalkCode += '''
1089 for (unsigned i = 0; i < eCount; i++) {
1090 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1091 Element destElem;
1092 %(readDest)s
1093 %(op)s
1094 destReg.elements[i] = htog(destElem);
1095 }
1096 ''' % { "op" : op, "readDest" : readDestCode }
1097 for reg in range(2):
1098 eWalkCode += '''
1099 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100 ''' % { "reg" : reg }
1101 iop = InstObjParams(name, Name,
1102 "RegRegImmOp",
1103 { "code": eWalkCode,
1104 "r_count": 2,
1105 "predicate_test": predicateTest,
1106 "op_class": opClass }, [])
1107 header_output += NeonRegRegImmOpDeclare.subst(iop)
1108 exec_output += NeonUnequalRegExecute.subst(iop)
1109 for type in types:
1110 substDict = { "targs" : type,
1111 "class_name" : Name }
1112 exec_output += NeonExecDeclare.subst(substDict)
1113
1114 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115 global header_output, exec_output
1116 eWalkCode = simdEnabledCheckCode + '''
1117 RegVect srcReg1;
1118 BigRegVect destReg;
1119 '''
1120 for reg in range(2):
1121 eWalkCode += '''
1122 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123 ''' % { "reg" : reg }
1124 if readDest:
1125 for reg in range(4):
1126 eWalkCode += '''
1127 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128 ''' % { "reg" : reg }
1129 readDestCode = ''
1130 if readDest:
1131 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1132 eWalkCode += '''
1133 for (unsigned i = 0; i < eCount; i++) {
1134 Element srcElem1 = gtoh(srcReg1.elements[i]);
1135 BigElement destElem;
1136 %(readDest)s
1137 %(op)s
1138 destReg.elements[i] = htog(destElem);
1139 }
1140 ''' % { "op" : op, "readDest" : readDestCode }
1141 for reg in range(4):
1142 eWalkCode += '''
1143 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144 ''' % { "reg" : reg }
1145 iop = InstObjParams(name, Name,
1146 "RegRegImmOp",
1147 { "code": eWalkCode,
1148 "r_count": 2,
1149 "predicate_test": predicateTest,
1150 "op_class": opClass }, [])
1151 header_output += NeonRegRegImmOpDeclare.subst(iop)
1152 exec_output += NeonUnequalRegExecute.subst(iop)
1153 for type in types:
1154 substDict = { "targs" : type,
1155 "class_name" : Name }
1156 exec_output += NeonExecDeclare.subst(substDict)
1157
1158 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159 global header_output, exec_output
1160 eWalkCode = simdEnabledCheckCode + '''
1161 RegVect srcReg1, destReg;
1162 '''
1163 for reg in range(rCount):
1164 eWalkCode += '''
1165 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166 ''' % { "reg" : reg }
1167 if readDest:
1168 eWalkCode += '''
1169 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170 ''' % { "reg" : reg }
1171 readDestCode = ''
1172 if readDest:
1173 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1174 eWalkCode += '''
1175 for (unsigned i = 0; i < eCount; i++) {
1176 unsigned j = i;
1177 Element srcElem1 = gtoh(srcReg1.elements[i]);
1178 Element destElem;
1179 %(readDest)s
1180 %(op)s
1181 destReg.elements[j] = htog(destElem);
1182 }
1183 ''' % { "op" : op, "readDest" : readDestCode }
1184 for reg in range(rCount):
1185 eWalkCode += '''
1186 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187 ''' % { "reg" : reg }
1188 iop = InstObjParams(name, Name,
1189 "RegRegOp",
1190 { "code": eWalkCode,
1191 "r_count": rCount,
1192 "predicate_test": predicateTest,
1193 "op_class": opClass }, [])
1194 header_output += NeonRegRegOpDeclare.subst(iop)
1195 exec_output += NeonEqualRegExecute.subst(iop)
1196 for type in types:
1197 substDict = { "targs" : type,
1198 "class_name" : Name }
1199 exec_output += NeonExecDeclare.subst(substDict)
1200
1201 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202 global header_output, exec_output
1203 eWalkCode = simdEnabledCheckCode + '''
1204 RegVect srcReg1, destReg;
1205 '''
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209 ''' % { "reg" : reg }
1210 if readDest:
1211 eWalkCode += '''
1212 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213 ''' % { "reg" : reg }
1214 readDestCode = ''
1215 if readDest:
1216 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1217 eWalkCode += '''
1218 for (unsigned i = 0; i < eCount; i++) {
1219 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1220 Element destElem;
1221 %(readDest)s
1222 %(op)s
1223 destReg.elements[i] = htog(destElem);
1224 }
1225 ''' % { "op" : op, "readDest" : readDestCode }
1226 for reg in range(rCount):
1227 eWalkCode += '''
1228 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229 ''' % { "reg" : reg }
1230 iop = InstObjParams(name, Name,
1231 "RegRegImmOp",
1232 { "code": eWalkCode,
1233 "r_count": rCount,
1234 "predicate_test": predicateTest,
1235 "op_class": opClass }, [])
1236 header_output += NeonRegRegImmOpDeclare.subst(iop)
1237 exec_output += NeonEqualRegExecute.subst(iop)
1238 for type in types:
1239 substDict = { "targs" : type,
1240 "class_name" : Name }
1241 exec_output += NeonExecDeclare.subst(substDict)
1242
1243 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244 global header_output, exec_output
1245 eWalkCode = simdEnabledCheckCode + '''
1246 RegVect srcReg1, destReg;
1247 '''
1248 for reg in range(rCount):
1249 eWalkCode += '''
1250 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252 ''' % { "reg" : reg }
1253 if readDest:
1254 eWalkCode += '''
1255 ''' % { "reg" : reg }
1256 readDestCode = ''
1257 if readDest:
1258 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1259 eWalkCode += op
1260 for reg in range(rCount):
1261 eWalkCode += '''
1262 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264 ''' % { "reg" : reg }
1265 iop = InstObjParams(name, Name,
1266 "RegRegOp",
1267 { "code": eWalkCode,
1268 "r_count": rCount,
1269 "predicate_test": predicateTest,
1270 "op_class": opClass }, [])
1271 header_output += NeonRegRegOpDeclare.subst(iop)
1272 exec_output += NeonEqualRegExecute.subst(iop)
1273 for type in types:
1274 substDict = { "targs" : type,
1275 "class_name" : Name }
1276 exec_output += NeonExecDeclare.subst(substDict)
1277
1278 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279 readDest=False, toInt=False):
1280 global header_output, exec_output
1281 eWalkCode = simdEnabledCheckCode + '''
1282 typedef FloatReg FloatVect[rCount];
1283 FloatVect srcRegs1;
1284 '''
1285 if toInt:
1286 eWalkCode += 'RegVect destRegs;\n'
1287 else:
1288 eWalkCode += 'FloatVect destRegs;\n'
1289 for reg in range(rCount):
1290 eWalkCode += '''
1291 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292 ''' % { "reg" : reg }
1293 if readDest:
1294 if toInt:
1295 eWalkCode += '''
1296 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297 ''' % { "reg" : reg }
1298 else:
1299 eWalkCode += '''
1300 destRegs[%(reg)d] = FpDestP%(reg)d;
1301 ''' % { "reg" : reg }
1302 readDestCode = ''
1303 if readDest:
1304 readDestCode = 'destReg = destRegs[i];'
1305 destType = 'FloatReg'
1306 writeDest = 'destRegs[r] = destReg;'
1307 if toInt:
1308 destType = 'FloatRegBits'
1309 writeDest = 'destRegs.regs[r] = destReg;'
1310 eWalkCode += '''
1311 for (unsigned r = 0; r < rCount; r++) {
1312 FloatReg srcReg1 = srcRegs1[r];
1313 %(destType)s destReg;
1314 %(readDest)s
1315 %(op)s
1316 %(writeDest)s
1317 }
1318 ''' % { "op" : op,
1319 "readDest" : readDestCode,
1320 "destType" : destType,
1321 "writeDest" : writeDest }
1322 for reg in range(rCount):
1323 if toInt:
1324 eWalkCode += '''
1325 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326 ''' % { "reg" : reg }
1327 else:
1328 eWalkCode += '''
1329 FpDestP%(reg)d = destRegs[%(reg)d];
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "FpRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": rCount,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegOpDeclare.subst(iop)
1338 exec_output += NeonEqualRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345 global header_output, exec_output
1346 eWalkCode = simdEnabledCheckCode + '''
1347 RegVect srcRegs;
1348 BigRegVect destReg;
1349 '''
1350 for reg in range(rCount):
1351 eWalkCode += '''
1352 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353 ''' % { "reg" : reg }
1354 if readDest:
1355 eWalkCode += '''
1356 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357 ''' % { "reg" : reg }
1358 readDestCode = ''
1359 if readDest:
1360 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1361 eWalkCode += '''
1362 for (unsigned i = 0; i < eCount / 2; i++) {
1363 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365 BigElement destElem;
1366 %(readDest)s
1367 %(op)s
1368 destReg.elements[i] = htog(destElem);
1369 }
1370 ''' % { "op" : op, "readDest" : readDestCode }
1371 for reg in range(rCount):
1372 eWalkCode += '''
1373 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374 ''' % { "reg" : reg }
1375 iop = InstObjParams(name, Name,
1376 "RegRegOp",
1377 { "code": eWalkCode,
1378 "r_count": rCount,
1379 "predicate_test": predicateTest,
1380 "op_class": opClass }, [])
1381 header_output += NeonRegRegOpDeclare.subst(iop)
1382 exec_output += NeonUnequalRegExecute.subst(iop)
1383 for type in types:
1384 substDict = { "targs" : type,
1385 "class_name" : Name }
1386 exec_output += NeonExecDeclare.subst(substDict)
1387
1388 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389 global header_output, exec_output
1390 eWalkCode = simdEnabledCheckCode + '''
1391 BigRegVect srcReg1;
1392 RegVect destReg;
1393 '''
1394 for reg in range(4):
1395 eWalkCode += '''
1396 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397 ''' % { "reg" : reg }
1398 if readDest:
1399 for reg in range(2):
1400 eWalkCode += '''
1401 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402 ''' % { "reg" : reg }
1403 readDestCode = ''
1404 if readDest:
1405 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1406 eWalkCode += '''
1407 for (unsigned i = 0; i < eCount; i++) {
1408 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1409 Element destElem;
1410 %(readDest)s
1411 %(op)s
1412 destReg.elements[i] = htog(destElem);
1413 }
1414 ''' % { "op" : op, "readDest" : readDestCode }
1415 for reg in range(2):
1416 eWalkCode += '''
1417 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418 ''' % { "reg" : reg }
1419 iop = InstObjParams(name, Name,
1420 "RegRegOp",
1421 { "code": eWalkCode,
1422 "r_count": 2,
1423 "predicate_test": predicateTest,
1424 "op_class": opClass }, [])
1425 header_output += NeonRegRegOpDeclare.subst(iop)
1426 exec_output += NeonUnequalRegExecute.subst(iop)
1427 for type in types:
1428 substDict = { "targs" : type,
1429 "class_name" : Name }
1430 exec_output += NeonExecDeclare.subst(substDict)
1431
1432 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433 global header_output, exec_output
1434 eWalkCode = simdEnabledCheckCode + '''
1435 RegVect destReg;
1436 '''
1437 if readDest:
1438 for reg in range(rCount):
1439 eWalkCode += '''
1440 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441 ''' % { "reg" : reg }
1442 readDestCode = ''
1443 if readDest:
1444 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1445 eWalkCode += '''
1446 for (unsigned i = 0; i < eCount; i++) {
1447 Element destElem;
1448 %(readDest)s
1449 %(op)s
1450 destReg.elements[i] = htog(destElem);
1451 }
1452 ''' % { "op" : op, "readDest" : readDestCode }
1453 for reg in range(rCount):
1454 eWalkCode += '''
1455 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456 ''' % { "reg" : reg }
1457 iop = InstObjParams(name, Name,
1458 "RegImmOp",
1459 { "code": eWalkCode,
1460 "r_count": rCount,
1461 "predicate_test": predicateTest,
1462 "op_class": opClass }, [])
1463 header_output += NeonRegImmOpDeclare.subst(iop)
1464 exec_output += NeonEqualRegExecute.subst(iop)
1465 for type in types:
1466 substDict = { "targs" : type,
1467 "class_name" : Name }
1468 exec_output += NeonExecDeclare.subst(substDict)
1469
1470 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471 global header_output, exec_output
1472 eWalkCode = simdEnabledCheckCode + '''
1473 RegVect srcReg1;
1474 BigRegVect destReg;
1475 '''
1476 for reg in range(2):
1477 eWalkCode += '''
1478 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479 ''' % { "reg" : reg }
1480 if readDest:
1481 for reg in range(4):
1482 eWalkCode += '''
1483 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484 ''' % { "reg" : reg }
1485 readDestCode = ''
1486 if readDest:
1487 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1488 eWalkCode += '''
1489 for (unsigned i = 0; i < eCount; i++) {
1490 Element srcElem1 = gtoh(srcReg1.elements[i]);
1491 BigElement destElem;
1492 %(readDest)s
1493 %(op)s
1494 destReg.elements[i] = htog(destElem);
1495 }
1496 ''' % { "op" : op, "readDest" : readDestCode }
1497 for reg in range(4):
1498 eWalkCode += '''
1499 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500 ''' % { "reg" : reg }
1501 iop = InstObjParams(name, Name,
1502 "RegRegOp",
1503 { "code": eWalkCode,
1504 "r_count": 2,
1505 "predicate_test": predicateTest,
1506 "op_class": opClass }, [])
1507 header_output += NeonRegRegOpDeclare.subst(iop)
1508 exec_output += NeonUnequalRegExecute.subst(iop)
1509 for type in types:
1510 substDict = { "targs" : type,
1511 "class_name" : Name }
1512 exec_output += NeonExecDeclare.subst(substDict)
1513
1514 vhaddCode = '''
1515 Element carryBit =
1516 (((unsigned)srcElem1 & 0x1) +
1517 ((unsigned)srcElem2 & 0x1)) >> 1;
1518 // Use division instead of a shift to ensure the sign extension works
1519 // right. The compiler will figure out if it can be a shift. Mask the
1520 // inputs so they get truncated correctly.
1521 destElem = (((srcElem1 & ~(Element)1) / 2) +
1522 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1523 '''
1524 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1526
1527 vrhaddCode = '''
1528 Element carryBit =
1529 (((unsigned)srcElem1 & 0x1) +
1530 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531 // Use division instead of a shift to ensure the sign extension works
1532 // right. The compiler will figure out if it can be a shift. Mask the
1533 // inputs so they get truncated correctly.
1534 destElem = (((srcElem1 & ~(Element)1) / 2) +
1535 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1536 '''
1537 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1539
1540 vhsubCode = '''
1541 Element barrowBit =
1542 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543 // Use division instead of a shift to ensure the sign extension works
1544 // right. The compiler will figure out if it can be a shift. Mask the
1545 // inputs so they get truncated correctly.
1546 destElem = (((srcElem1 & ~(Element)1) / 2) -
1547 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1548 '''
1549 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1551
1552 vandCode = '''
1553 destElem = srcElem1 & srcElem2;
1554 '''
1555 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1557
1558 vbicCode = '''
1559 destElem = srcElem1 & ~srcElem2;
1560 '''
1561 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1563
1564 vorrCode = '''
1565 destElem = srcElem1 | srcElem2;
1566 '''
1567 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1569
1570 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1572
1573 vornCode = '''
1574 destElem = srcElem1 | ~srcElem2;
1575 '''
1576 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1578
1579 veorCode = '''
1580 destElem = srcElem1 ^ srcElem2;
1581 '''
1582 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1584
1585 vbifCode = '''
1586 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1587 '''
1588 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1590 vbitCode = '''
1591 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1592 '''
1593 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1595 vbslCode = '''
1596 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1597 '''
1598 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1600
1601 vmaxCode = '''
1602 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1603 '''
1604 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1606
1607 vminCode = '''
1608 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1609 '''
1610 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613 vaddCode = '''
1614 destElem = srcElem1 + srcElem2;
1615 '''
1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (isSnan(op1) || isSnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (isSnan(op1) || isSnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (isSnan(op1) || isSnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (isSnan(op1) || isSnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (isSnan(op1) || isSnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (isSnan(op1) || isSnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'
704 else:
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
707 eWalkCode += '''
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
711 if readDest:
712 if toInt:
713 eWalkCode += '''
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
716 else:
717 eWalkCode += '''
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
720 readDestCode = ''
721 if readDest:
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
725 if toInt:
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
728 if pairwise:
729 eWalkCode += '''
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
736 %(readDest)s
737 %(op)s
738 %(writeDest)s
739 }
740 ''' % { "op" : op,
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
744 else:
745 eWalkCode += '''
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
750 %(readDest)s
751 %(op)s
752 %(writeDest)s
753 }
754 ''' % { "op" : op,
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
759 if toInt:
760 eWalkCode += '''
761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:
789 src2Cnt = 4
790 src2Prefix = 'Big'
791 if bigDest:
792 destCnt = 4
793 destPrefix = 'Big'
794 eWalkCode = simdEnabledCheckCode + '''
795 %sRegVect srcReg1;
796 %sRegVect srcReg2;
797 %sRegVect destReg;
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
800 eWalkCode += '''
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
804 eWalkCode += '''
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806 ''' % { "reg" : reg }
807 if readDest:
808 for reg in range(destCnt):
809 eWalkCode += '''
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811 ''' % { "reg" : reg }
812 readDestCode = ''
813 if readDest:
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815 eWalkCode += '''
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
820 %(readDest)s
821 %(op)s
822 destReg.elements[i] = htog(destElem);
823 }
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
847
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
851
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
855
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865 ''' % { "reg" : reg }
866 if readDest:
867 eWalkCode += '''
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869 ''' % { "reg" : reg }
870 readDestCode = ''
871 if readDest:
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873 eWalkCode += '''
874 if (imm < 0 && imm >= eCount) {
875 if (FullSystem)
876 fault = new UndefinedInstruction;
877 else
878 fault = new UndefinedInstruction(false, mnemonic);
879 } else {
880 for (unsigned i = 0; i < eCount; i++) {
881 Element srcElem1 = gtoh(srcReg1.elements[i]);
882 Element srcElem2 = gtoh(srcReg2.elements[imm]);
883 Element destElem;
884 %(readDest)s
885 %(op)s
886 destReg.elements[i] = htog(destElem);
887 }
888 }
889 ''' % { "op" : op, "readDest" : readDestCode }
890 for reg in range(rCount):
891 eWalkCode += '''
892 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893 ''' % { "reg" : reg }
894 iop = InstObjParams(name, Name,
895 "RegRegRegImmOp",
896 { "code": eWalkCode,
897 "r_count": rCount,
898 "predicate_test": predicateTest,
899 "op_class": opClass }, [])
900 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901 exec_output += NeonEqualRegExecute.subst(iop)
902 for type in types:
903 substDict = { "targs" : type,
904 "class_name" : Name }
905 exec_output += NeonExecDeclare.subst(substDict)
906
907 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908 global header_output, exec_output
909 rCount = 2
910 eWalkCode = simdEnabledCheckCode + '''
911 RegVect srcReg1, srcReg2;
912 BigRegVect destReg;
913 '''
914 for reg in range(rCount):
915 eWalkCode += '''
916 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918 ''' % { "reg" : reg }
919 if readDest:
920 for reg in range(2 * rCount):
921 eWalkCode += '''
922 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923 ''' % { "reg" : reg }
924 readDestCode = ''
925 if readDest:
926 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
927 eWalkCode += '''
928 if (imm < 0 && imm >= eCount) {
929 if (FullSystem)
930 fault = new UndefinedInstruction;
931 else
932 fault = new UndefinedInstruction(false, mnemonic);
933 } else {
934 for (unsigned i = 0; i < eCount; i++) {
935 Element srcElem1 = gtoh(srcReg1.elements[i]);
936 Element srcElem2 = gtoh(srcReg2.elements[imm]);
937 BigElement destElem;
938 %(readDest)s
939 %(op)s
940 destReg.elements[i] = htog(destElem);
941 }
942 }
943 ''' % { "op" : op, "readDest" : readDestCode }
944 for reg in range(2 * rCount):
945 eWalkCode += '''
946 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947 ''' % { "reg" : reg }
948 iop = InstObjParams(name, Name,
949 "RegRegRegImmOp",
950 { "code": eWalkCode,
951 "r_count": rCount,
952 "predicate_test": predicateTest,
953 "op_class": opClass }, [])
954 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955 exec_output += NeonUnequalRegExecute.subst(iop)
956 for type in types:
957 substDict = { "targs" : type,
958 "class_name" : Name }
959 exec_output += NeonExecDeclare.subst(substDict)
960
961 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962 global header_output, exec_output
963 eWalkCode = simdEnabledCheckCode + '''
964 typedef FloatReg FloatVect[rCount];
965 FloatVect srcRegs1, srcRegs2, destRegs;
966 '''
967 for reg in range(rCount):
968 eWalkCode += '''
969 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971 ''' % { "reg" : reg }
972 if readDest:
973 eWalkCode += '''
974 destRegs[%(reg)d] = FpDestP%(reg)d;
975 ''' % { "reg" : reg }
976 readDestCode = ''
977 if readDest:
978 readDestCode = 'destReg = destRegs[i];'
979 eWalkCode += '''
980 if (imm < 0 && imm >= eCount) {
981 if (FullSystem)
982 fault = new UndefinedInstruction;
983 else
984 fault = new UndefinedInstruction(false, mnemonic);
985 } else {
986 for (unsigned i = 0; i < rCount; i++) {
987 FloatReg srcReg1 = srcRegs1[i];
988 FloatReg srcReg2 = srcRegs2[imm];
989 FloatReg destReg;
990 %(readDest)s
991 %(op)s
992 destRegs[i] = destReg;
993 }
994 }
995 ''' % { "op" : op, "readDest" : readDestCode }
996 for reg in range(rCount):
997 eWalkCode += '''
998 FpDestP%(reg)d = destRegs[%(reg)d];
999 ''' % { "reg" : reg }
1000 iop = InstObjParams(name, Name,
1001 "FpRegRegRegImmOp",
1002 { "code": eWalkCode,
1003 "r_count": rCount,
1004 "predicate_test": predicateTest,
1005 "op_class": opClass }, [])
1006 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007 exec_output += NeonEqualRegExecute.subst(iop)
1008 for type in types:
1009 substDict = { "targs" : type,
1010 "class_name" : Name }
1011 exec_output += NeonExecDeclare.subst(substDict)
1012
1013 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014 readDest=False, toInt=False, fromInt=False):
1015 global header_output, exec_output
1016 eWalkCode = simdEnabledCheckCode + '''
1017 RegVect srcRegs1, destRegs;
1018 '''
1019 for reg in range(rCount):
1020 eWalkCode += '''
1021 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022 ''' % { "reg" : reg }
1023 if readDest:
1024 eWalkCode += '''
1025 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026 ''' % { "reg" : reg }
1027 readDestCode = ''
1028 if readDest:
1029 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1030 if toInt:
1031 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1033 if fromInt:
1034 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035 declDest = 'Element destElem;'
1036 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1037 if toInt:
1038 declDest = 'FloatRegBits destReg;'
1039 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1040 eWalkCode += '''
1041 for (unsigned i = 0; i < eCount; i++) {
1042 %(readOp)s
1043 %(declDest)s
1044 %(readDest)s
1045 %(op)s
1046 %(writeDest)s
1047 }
1048 ''' % { "readOp" : readOpCode,
1049 "declDest" : declDest,
1050 "readDest" : readDestCode,
1051 "op" : op,
1052 "writeDest" : writeDestCode }
1053 for reg in range(rCount):
1054 eWalkCode += '''
1055 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056 ''' % { "reg" : reg }
1057 iop = InstObjParams(name, Name,
1058 "RegRegImmOp",
1059 { "code": eWalkCode,
1060 "r_count": rCount,
1061 "predicate_test": predicateTest,
1062 "op_class": opClass }, [])
1063 header_output += NeonRegRegImmOpDeclare.subst(iop)
1064 exec_output += NeonEqualRegExecute.subst(iop)
1065 for type in types:
1066 substDict = { "targs" : type,
1067 "class_name" : Name }
1068 exec_output += NeonExecDeclare.subst(substDict)
1069
1070 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071 global header_output, exec_output
1072 eWalkCode = simdEnabledCheckCode + '''
1073 BigRegVect srcReg1;
1074 RegVect destReg;
1075 '''
1076 for reg in range(4):
1077 eWalkCode += '''
1078 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079 ''' % { "reg" : reg }
1080 if readDest:
1081 for reg in range(2):
1082 eWalkCode += '''
1083 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084 ''' % { "reg" : reg }
1085 readDestCode = ''
1086 if readDest:
1087 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1088 eWalkCode += '''
1089 for (unsigned i = 0; i < eCount; i++) {
1090 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1091 Element destElem;
1092 %(readDest)s
1093 %(op)s
1094 destReg.elements[i] = htog(destElem);
1095 }
1096 ''' % { "op" : op, "readDest" : readDestCode }
1097 for reg in range(2):
1098 eWalkCode += '''
1099 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100 ''' % { "reg" : reg }
1101 iop = InstObjParams(name, Name,
1102 "RegRegImmOp",
1103 { "code": eWalkCode,
1104 "r_count": 2,
1105 "predicate_test": predicateTest,
1106 "op_class": opClass }, [])
1107 header_output += NeonRegRegImmOpDeclare.subst(iop)
1108 exec_output += NeonUnequalRegExecute.subst(iop)
1109 for type in types:
1110 substDict = { "targs" : type,
1111 "class_name" : Name }
1112 exec_output += NeonExecDeclare.subst(substDict)
1113
1114 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115 global header_output, exec_output
1116 eWalkCode = simdEnabledCheckCode + '''
1117 RegVect srcReg1;
1118 BigRegVect destReg;
1119 '''
1120 for reg in range(2):
1121 eWalkCode += '''
1122 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123 ''' % { "reg" : reg }
1124 if readDest:
1125 for reg in range(4):
1126 eWalkCode += '''
1127 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128 ''' % { "reg" : reg }
1129 readDestCode = ''
1130 if readDest:
1131 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1132 eWalkCode += '''
1133 for (unsigned i = 0; i < eCount; i++) {
1134 Element srcElem1 = gtoh(srcReg1.elements[i]);
1135 BigElement destElem;
1136 %(readDest)s
1137 %(op)s
1138 destReg.elements[i] = htog(destElem);
1139 }
1140 ''' % { "op" : op, "readDest" : readDestCode }
1141 for reg in range(4):
1142 eWalkCode += '''
1143 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144 ''' % { "reg" : reg }
1145 iop = InstObjParams(name, Name,
1146 "RegRegImmOp",
1147 { "code": eWalkCode,
1148 "r_count": 2,
1149 "predicate_test": predicateTest,
1150 "op_class": opClass }, [])
1151 header_output += NeonRegRegImmOpDeclare.subst(iop)
1152 exec_output += NeonUnequalRegExecute.subst(iop)
1153 for type in types:
1154 substDict = { "targs" : type,
1155 "class_name" : Name }
1156 exec_output += NeonExecDeclare.subst(substDict)
1157
1158 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159 global header_output, exec_output
1160 eWalkCode = simdEnabledCheckCode + '''
1161 RegVect srcReg1, destReg;
1162 '''
1163 for reg in range(rCount):
1164 eWalkCode += '''
1165 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166 ''' % { "reg" : reg }
1167 if readDest:
1168 eWalkCode += '''
1169 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170 ''' % { "reg" : reg }
1171 readDestCode = ''
1172 if readDest:
1173 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1174 eWalkCode += '''
1175 for (unsigned i = 0; i < eCount; i++) {
1176 unsigned j = i;
1177 Element srcElem1 = gtoh(srcReg1.elements[i]);
1178 Element destElem;
1179 %(readDest)s
1180 %(op)s
1181 destReg.elements[j] = htog(destElem);
1182 }
1183 ''' % { "op" : op, "readDest" : readDestCode }
1184 for reg in range(rCount):
1185 eWalkCode += '''
1186 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187 ''' % { "reg" : reg }
1188 iop = InstObjParams(name, Name,
1189 "RegRegOp",
1190 { "code": eWalkCode,
1191 "r_count": rCount,
1192 "predicate_test": predicateTest,
1193 "op_class": opClass }, [])
1194 header_output += NeonRegRegOpDeclare.subst(iop)
1195 exec_output += NeonEqualRegExecute.subst(iop)
1196 for type in types:
1197 substDict = { "targs" : type,
1198 "class_name" : Name }
1199 exec_output += NeonExecDeclare.subst(substDict)
1200
1201 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202 global header_output, exec_output
1203 eWalkCode = simdEnabledCheckCode + '''
1204 RegVect srcReg1, destReg;
1205 '''
1206 for reg in range(rCount):
1207 eWalkCode += '''
1208 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209 ''' % { "reg" : reg }
1210 if readDest:
1211 eWalkCode += '''
1212 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213 ''' % { "reg" : reg }
1214 readDestCode = ''
1215 if readDest:
1216 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1217 eWalkCode += '''
1218 for (unsigned i = 0; i < eCount; i++) {
1219 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1220 Element destElem;
1221 %(readDest)s
1222 %(op)s
1223 destReg.elements[i] = htog(destElem);
1224 }
1225 ''' % { "op" : op, "readDest" : readDestCode }
1226 for reg in range(rCount):
1227 eWalkCode += '''
1228 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229 ''' % { "reg" : reg }
1230 iop = InstObjParams(name, Name,
1231 "RegRegImmOp",
1232 { "code": eWalkCode,
1233 "r_count": rCount,
1234 "predicate_test": predicateTest,
1235 "op_class": opClass }, [])
1236 header_output += NeonRegRegImmOpDeclare.subst(iop)
1237 exec_output += NeonEqualRegExecute.subst(iop)
1238 for type in types:
1239 substDict = { "targs" : type,
1240 "class_name" : Name }
1241 exec_output += NeonExecDeclare.subst(substDict)
1242
1243 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244 global header_output, exec_output
1245 eWalkCode = simdEnabledCheckCode + '''
1246 RegVect srcReg1, destReg;
1247 '''
1248 for reg in range(rCount):
1249 eWalkCode += '''
1250 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252 ''' % { "reg" : reg }
1253 if readDest:
1254 eWalkCode += '''
1255 ''' % { "reg" : reg }
1256 readDestCode = ''
1257 if readDest:
1258 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1259 eWalkCode += op
1260 for reg in range(rCount):
1261 eWalkCode += '''
1262 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264 ''' % { "reg" : reg }
1265 iop = InstObjParams(name, Name,
1266 "RegRegOp",
1267 { "code": eWalkCode,
1268 "r_count": rCount,
1269 "predicate_test": predicateTest,
1270 "op_class": opClass }, [])
1271 header_output += NeonRegRegOpDeclare.subst(iop)
1272 exec_output += NeonEqualRegExecute.subst(iop)
1273 for type in types:
1274 substDict = { "targs" : type,
1275 "class_name" : Name }
1276 exec_output += NeonExecDeclare.subst(substDict)
1277
1278 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279 readDest=False, toInt=False):
1280 global header_output, exec_output
1281 eWalkCode = simdEnabledCheckCode + '''
1282 typedef FloatReg FloatVect[rCount];
1283 FloatVect srcRegs1;
1284 '''
1285 if toInt:
1286 eWalkCode += 'RegVect destRegs;\n'
1287 else:
1288 eWalkCode += 'FloatVect destRegs;\n'
1289 for reg in range(rCount):
1290 eWalkCode += '''
1291 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292 ''' % { "reg" : reg }
1293 if readDest:
1294 if toInt:
1295 eWalkCode += '''
1296 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297 ''' % { "reg" : reg }
1298 else:
1299 eWalkCode += '''
1300 destRegs[%(reg)d] = FpDestP%(reg)d;
1301 ''' % { "reg" : reg }
1302 readDestCode = ''
1303 if readDest:
1304 readDestCode = 'destReg = destRegs[i];'
1305 destType = 'FloatReg'
1306 writeDest = 'destRegs[r] = destReg;'
1307 if toInt:
1308 destType = 'FloatRegBits'
1309 writeDest = 'destRegs.regs[r] = destReg;'
1310 eWalkCode += '''
1311 for (unsigned r = 0; r < rCount; r++) {
1312 FloatReg srcReg1 = srcRegs1[r];
1313 %(destType)s destReg;
1314 %(readDest)s
1315 %(op)s
1316 %(writeDest)s
1317 }
1318 ''' % { "op" : op,
1319 "readDest" : readDestCode,
1320 "destType" : destType,
1321 "writeDest" : writeDest }
1322 for reg in range(rCount):
1323 if toInt:
1324 eWalkCode += '''
1325 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326 ''' % { "reg" : reg }
1327 else:
1328 eWalkCode += '''
1329 FpDestP%(reg)d = destRegs[%(reg)d];
1330 ''' % { "reg" : reg }
1331 iop = InstObjParams(name, Name,
1332 "FpRegRegOp",
1333 { "code": eWalkCode,
1334 "r_count": rCount,
1335 "predicate_test": predicateTest,
1336 "op_class": opClass }, [])
1337 header_output += NeonRegRegOpDeclare.subst(iop)
1338 exec_output += NeonEqualRegExecute.subst(iop)
1339 for type in types:
1340 substDict = { "targs" : type,
1341 "class_name" : Name }
1342 exec_output += NeonExecDeclare.subst(substDict)
1343
1344 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345 global header_output, exec_output
1346 eWalkCode = simdEnabledCheckCode + '''
1347 RegVect srcRegs;
1348 BigRegVect destReg;
1349 '''
1350 for reg in range(rCount):
1351 eWalkCode += '''
1352 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353 ''' % { "reg" : reg }
1354 if readDest:
1355 eWalkCode += '''
1356 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357 ''' % { "reg" : reg }
1358 readDestCode = ''
1359 if readDest:
1360 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1361 eWalkCode += '''
1362 for (unsigned i = 0; i < eCount / 2; i++) {
1363 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365 BigElement destElem;
1366 %(readDest)s
1367 %(op)s
1368 destReg.elements[i] = htog(destElem);
1369 }
1370 ''' % { "op" : op, "readDest" : readDestCode }
1371 for reg in range(rCount):
1372 eWalkCode += '''
1373 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374 ''' % { "reg" : reg }
1375 iop = InstObjParams(name, Name,
1376 "RegRegOp",
1377 { "code": eWalkCode,
1378 "r_count": rCount,
1379 "predicate_test": predicateTest,
1380 "op_class": opClass }, [])
1381 header_output += NeonRegRegOpDeclare.subst(iop)
1382 exec_output += NeonUnequalRegExecute.subst(iop)
1383 for type in types:
1384 substDict = { "targs" : type,
1385 "class_name" : Name }
1386 exec_output += NeonExecDeclare.subst(substDict)
1387
1388 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389 global header_output, exec_output
1390 eWalkCode = simdEnabledCheckCode + '''
1391 BigRegVect srcReg1;
1392 RegVect destReg;
1393 '''
1394 for reg in range(4):
1395 eWalkCode += '''
1396 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397 ''' % { "reg" : reg }
1398 if readDest:
1399 for reg in range(2):
1400 eWalkCode += '''
1401 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402 ''' % { "reg" : reg }
1403 readDestCode = ''
1404 if readDest:
1405 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1406 eWalkCode += '''
1407 for (unsigned i = 0; i < eCount; i++) {
1408 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1409 Element destElem;
1410 %(readDest)s
1411 %(op)s
1412 destReg.elements[i] = htog(destElem);
1413 }
1414 ''' % { "op" : op, "readDest" : readDestCode }
1415 for reg in range(2):
1416 eWalkCode += '''
1417 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418 ''' % { "reg" : reg }
1419 iop = InstObjParams(name, Name,
1420 "RegRegOp",
1421 { "code": eWalkCode,
1422 "r_count": 2,
1423 "predicate_test": predicateTest,
1424 "op_class": opClass }, [])
1425 header_output += NeonRegRegOpDeclare.subst(iop)
1426 exec_output += NeonUnequalRegExecute.subst(iop)
1427 for type in types:
1428 substDict = { "targs" : type,
1429 "class_name" : Name }
1430 exec_output += NeonExecDeclare.subst(substDict)
1431
1432 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433 global header_output, exec_output
1434 eWalkCode = simdEnabledCheckCode + '''
1435 RegVect destReg;
1436 '''
1437 if readDest:
1438 for reg in range(rCount):
1439 eWalkCode += '''
1440 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441 ''' % { "reg" : reg }
1442 readDestCode = ''
1443 if readDest:
1444 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1445 eWalkCode += '''
1446 for (unsigned i = 0; i < eCount; i++) {
1447 Element destElem;
1448 %(readDest)s
1449 %(op)s
1450 destReg.elements[i] = htog(destElem);
1451 }
1452 ''' % { "op" : op, "readDest" : readDestCode }
1453 for reg in range(rCount):
1454 eWalkCode += '''
1455 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456 ''' % { "reg" : reg }
1457 iop = InstObjParams(name, Name,
1458 "RegImmOp",
1459 { "code": eWalkCode,
1460 "r_count": rCount,
1461 "predicate_test": predicateTest,
1462 "op_class": opClass }, [])
1463 header_output += NeonRegImmOpDeclare.subst(iop)
1464 exec_output += NeonEqualRegExecute.subst(iop)
1465 for type in types:
1466 substDict = { "targs" : type,
1467 "class_name" : Name }
1468 exec_output += NeonExecDeclare.subst(substDict)
1469
1470 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471 global header_output, exec_output
1472 eWalkCode = simdEnabledCheckCode + '''
1473 RegVect srcReg1;
1474 BigRegVect destReg;
1475 '''
1476 for reg in range(2):
1477 eWalkCode += '''
1478 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479 ''' % { "reg" : reg }
1480 if readDest:
1481 for reg in range(4):
1482 eWalkCode += '''
1483 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484 ''' % { "reg" : reg }
1485 readDestCode = ''
1486 if readDest:
1487 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1488 eWalkCode += '''
1489 for (unsigned i = 0; i < eCount; i++) {
1490 Element srcElem1 = gtoh(srcReg1.elements[i]);
1491 BigElement destElem;
1492 %(readDest)s
1493 %(op)s
1494 destReg.elements[i] = htog(destElem);
1495 }
1496 ''' % { "op" : op, "readDest" : readDestCode }
1497 for reg in range(4):
1498 eWalkCode += '''
1499 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500 ''' % { "reg" : reg }
1501 iop = InstObjParams(name, Name,
1502 "RegRegOp",
1503 { "code": eWalkCode,
1504 "r_count": 2,
1505 "predicate_test": predicateTest,
1506 "op_class": opClass }, [])
1507 header_output += NeonRegRegOpDeclare.subst(iop)
1508 exec_output += NeonUnequalRegExecute.subst(iop)
1509 for type in types:
1510 substDict = { "targs" : type,
1511 "class_name" : Name }
1512 exec_output += NeonExecDeclare.subst(substDict)
1513
1514 vhaddCode = '''
1515 Element carryBit =
1516 (((unsigned)srcElem1 & 0x1) +
1517 ((unsigned)srcElem2 & 0x1)) >> 1;
1518 // Use division instead of a shift to ensure the sign extension works
1519 // right. The compiler will figure out if it can be a shift. Mask the
1520 // inputs so they get truncated correctly.
1521 destElem = (((srcElem1 & ~(Element)1) / 2) +
1522 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1523 '''
1524 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1526
1527 vrhaddCode = '''
1528 Element carryBit =
1529 (((unsigned)srcElem1 & 0x1) +
1530 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531 // Use division instead of a shift to ensure the sign extension works
1532 // right. The compiler will figure out if it can be a shift. Mask the
1533 // inputs so they get truncated correctly.
1534 destElem = (((srcElem1 & ~(Element)1) / 2) +
1535 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1536 '''
1537 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1539
1540 vhsubCode = '''
1541 Element barrowBit =
1542 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543 // Use division instead of a shift to ensure the sign extension works
1544 // right. The compiler will figure out if it can be a shift. Mask the
1545 // inputs so they get truncated correctly.
1546 destElem = (((srcElem1 & ~(Element)1) / 2) -
1547 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1548 '''
1549 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1551
1552 vandCode = '''
1553 destElem = srcElem1 & srcElem2;
1554 '''
1555 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1557
1558 vbicCode = '''
1559 destElem = srcElem1 & ~srcElem2;
1560 '''
1561 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1563
1564 vorrCode = '''
1565 destElem = srcElem1 | srcElem2;
1566 '''
1567 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1569
1570 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1572
1573 vornCode = '''
1574 destElem = srcElem1 | ~srcElem2;
1575 '''
1576 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1578
1579 veorCode = '''
1580 destElem = srcElem1 ^ srcElem2;
1581 '''
1582 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1584
1585 vbifCode = '''
1586 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1587 '''
1588 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1590 vbitCode = '''
1591 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1592 '''
1593 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1595 vbslCode = '''
1596 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1597 '''
1598 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1600
1601 vmaxCode = '''
1602 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1603 '''
1604 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1606
1607 vminCode = '''
1608 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1609 '''
1610 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613 vaddCode = '''
1614 destElem = srcElem1 + srcElem2;
1615 '''
1616 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1619 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1620 2, vaddCode, pairwise=True)
1620 2, vaddCode, pairwise=True)
1621 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1622 4, vaddCode, pairwise=True)
1623 vaddlwCode = '''
1624 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1625 '''
1626 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1627 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1628 vaddhnCode = '''
1629 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1630 (sizeof(Element) * 8);
1631 '''
1632 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1633 vraddhnCode = '''
1634 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1635 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1636 (sizeof(Element) * 8);
1637 '''
1638 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1639
1640 vsubCode = '''
1641 destElem = srcElem1 - srcElem2;
1642 '''
1643 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1644 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1645 vsublwCode = '''
1646 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1647 '''
1648 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1649 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1650
1651 vqaddUCode = '''
1652 destElem = srcElem1 + srcElem2;
1653 FPSCR fpscr = (FPSCR) FpscrQc;
1654 if (destElem < srcElem1 || destElem < srcElem2) {
1655 destElem = (Element)(-1);
1656 fpscr.qc = 1;
1657 }
1658 FpscrQc = fpscr;
1659 '''
1660 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1661 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1662 vsubhnCode = '''
1663 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1664 (sizeof(Element) * 8);
1665 '''
1666 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1667 vrsubhnCode = '''
1668 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1669 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1670 (sizeof(Element) * 8);
1671 '''
1672 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1673
1674 vqaddSCode = '''
1675 destElem = srcElem1 + srcElem2;
1676 FPSCR fpscr = (FPSCR) FpscrQc;
1677 bool negDest = (destElem < 0);
1678 bool negSrc1 = (srcElem1 < 0);
1679 bool negSrc2 = (srcElem2 < 0);
1680 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1681 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1682 if (negDest)
1683 destElem -= 1;
1684 fpscr.qc = 1;
1685 }
1686 FpscrQc = fpscr;
1687 '''
1688 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1689 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1690
1691 vqsubUCode = '''
1692 destElem = srcElem1 - srcElem2;
1693 FPSCR fpscr = (FPSCR) FpscrQc;
1694 if (destElem > srcElem1) {
1695 destElem = 0;
1696 fpscr.qc = 1;
1697 }
1698 FpscrQc = fpscr;
1699 '''
1700 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1701 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1702
1703 vqsubSCode = '''
1704 destElem = srcElem1 - srcElem2;
1705 FPSCR fpscr = (FPSCR) FpscrQc;
1706 bool negDest = (destElem < 0);
1707 bool negSrc1 = (srcElem1 < 0);
1708 bool posSrc2 = (srcElem2 >= 0);
1709 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1710 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1711 if (negDest)
1712 destElem -= 1;
1713 fpscr.qc = 1;
1714 }
1715 FpscrQc = fpscr;
1716 '''
1717 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1718 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1719
1720 vcgtCode = '''
1721 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1722 '''
1723 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1724 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1725
1726 vcgeCode = '''
1727 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1728 '''
1729 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1730 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1731
1732 vceqCode = '''
1733 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1734 '''
1735 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1736 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1737
1738 vshlCode = '''
1739 int16_t shiftAmt = (int8_t)srcElem2;
1740 if (shiftAmt < 0) {
1741 shiftAmt = -shiftAmt;
1742 if (shiftAmt >= sizeof(Element) * 8) {
1743 shiftAmt = sizeof(Element) * 8 - 1;
1744 destElem = 0;
1745 } else {
1746 destElem = (srcElem1 >> shiftAmt);
1747 }
1748 // Make sure the right shift sign extended when it should.
1749 if (ltz(srcElem1) && !ltz(destElem)) {
1750 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1751 1 - shiftAmt));
1752 }
1753 } else {
1754 if (shiftAmt >= sizeof(Element) * 8) {
1755 destElem = 0;
1756 } else {
1757 destElem = srcElem1 << shiftAmt;
1758 }
1759 }
1760 '''
1761 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1762 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1763
1764 vrshlCode = '''
1765 int16_t shiftAmt = (int8_t)srcElem2;
1766 if (shiftAmt < 0) {
1767 shiftAmt = -shiftAmt;
1768 Element rBit = 0;
1769 if (shiftAmt <= sizeof(Element) * 8)
1770 rBit = bits(srcElem1, shiftAmt - 1);
1771 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1772 rBit = 1;
1773 if (shiftAmt >= sizeof(Element) * 8) {
1774 shiftAmt = sizeof(Element) * 8 - 1;
1775 destElem = 0;
1776 } else {
1777 destElem = (srcElem1 >> shiftAmt);
1778 }
1779 // Make sure the right shift sign extended when it should.
1780 if (ltz(srcElem1) && !ltz(destElem)) {
1781 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1782 1 - shiftAmt));
1783 }
1784 destElem += rBit;
1785 } else if (shiftAmt > 0) {
1786 if (shiftAmt >= sizeof(Element) * 8) {
1787 destElem = 0;
1788 } else {
1789 destElem = srcElem1 << shiftAmt;
1790 }
1791 } else {
1792 destElem = srcElem1;
1793 }
1794 '''
1795 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1796 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1797
1798 vqshlUCode = '''
1799 int16_t shiftAmt = (int8_t)srcElem2;
1800 FPSCR fpscr = (FPSCR) FpscrQc;
1801 if (shiftAmt < 0) {
1802 shiftAmt = -shiftAmt;
1803 if (shiftAmt >= sizeof(Element) * 8) {
1804 shiftAmt = sizeof(Element) * 8 - 1;
1805 destElem = 0;
1806 } else {
1807 destElem = (srcElem1 >> shiftAmt);
1808 }
1809 } else if (shiftAmt > 0) {
1810 if (shiftAmt >= sizeof(Element) * 8) {
1811 if (srcElem1 != 0) {
1812 destElem = mask(sizeof(Element) * 8);
1813 fpscr.qc = 1;
1814 } else {
1815 destElem = 0;
1816 }
1817 } else {
1818 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1819 sizeof(Element) * 8 - shiftAmt)) {
1820 destElem = mask(sizeof(Element) * 8);
1821 fpscr.qc = 1;
1822 } else {
1823 destElem = srcElem1 << shiftAmt;
1824 }
1825 }
1826 } else {
1827 destElem = srcElem1;
1828 }
1829 FpscrQc = fpscr;
1830 '''
1831 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1832 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1833
1834 vqshlSCode = '''
1835 int16_t shiftAmt = (int8_t)srcElem2;
1836 FPSCR fpscr = (FPSCR) FpscrQc;
1837 if (shiftAmt < 0) {
1838 shiftAmt = -shiftAmt;
1839 if (shiftAmt >= sizeof(Element) * 8) {
1840 shiftAmt = sizeof(Element) * 8 - 1;
1841 destElem = 0;
1842 } else {
1843 destElem = (srcElem1 >> shiftAmt);
1844 }
1845 // Make sure the right shift sign extended when it should.
1846 if (srcElem1 < 0 && destElem >= 0) {
1847 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1848 1 - shiftAmt));
1849 }
1850 } else if (shiftAmt > 0) {
1851 bool sat = false;
1852 if (shiftAmt >= sizeof(Element) * 8) {
1853 if (srcElem1 != 0)
1854 sat = true;
1855 else
1856 destElem = 0;
1857 } else {
1858 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1859 sizeof(Element) * 8 - 1 - shiftAmt) !=
1860 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1861 sat = true;
1862 } else {
1863 destElem = srcElem1 << shiftAmt;
1864 }
1865 }
1866 if (sat) {
1867 fpscr.qc = 1;
1868 destElem = mask(sizeof(Element) * 8 - 1);
1869 if (srcElem1 < 0)
1870 destElem = ~destElem;
1871 }
1872 } else {
1873 destElem = srcElem1;
1874 }
1875 FpscrQc = fpscr;
1876 '''
1877 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1878 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1879
1880 vqrshlUCode = '''
1881 int16_t shiftAmt = (int8_t)srcElem2;
1882 FPSCR fpscr = (FPSCR) FpscrQc;
1883 if (shiftAmt < 0) {
1884 shiftAmt = -shiftAmt;
1885 Element rBit = 0;
1886 if (shiftAmt <= sizeof(Element) * 8)
1887 rBit = bits(srcElem1, shiftAmt - 1);
1888 if (shiftAmt >= sizeof(Element) * 8) {
1889 shiftAmt = sizeof(Element) * 8 - 1;
1890 destElem = 0;
1891 } else {
1892 destElem = (srcElem1 >> shiftAmt);
1893 }
1894 destElem += rBit;
1895 } else {
1896 if (shiftAmt >= sizeof(Element) * 8) {
1897 if (srcElem1 != 0) {
1898 destElem = mask(sizeof(Element) * 8);
1899 fpscr.qc = 1;
1900 } else {
1901 destElem = 0;
1902 }
1903 } else {
1904 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1905 sizeof(Element) * 8 - shiftAmt)) {
1906 destElem = mask(sizeof(Element) * 8);
1907 fpscr.qc = 1;
1908 } else {
1909 destElem = srcElem1 << shiftAmt;
1910 }
1911 }
1912 }
1913 FpscrQc = fpscr;
1914 '''
1915 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1916 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1917
1918 vqrshlSCode = '''
1919 int16_t shiftAmt = (int8_t)srcElem2;
1920 FPSCR fpscr = (FPSCR) FpscrQc;
1921 if (shiftAmt < 0) {
1922 shiftAmt = -shiftAmt;
1923 Element rBit = 0;
1924 if (shiftAmt <= sizeof(Element) * 8)
1925 rBit = bits(srcElem1, shiftAmt - 1);
1926 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1927 rBit = 1;
1928 if (shiftAmt >= sizeof(Element) * 8) {
1929 shiftAmt = sizeof(Element) * 8 - 1;
1930 destElem = 0;
1931 } else {
1932 destElem = (srcElem1 >> shiftAmt);
1933 }
1934 // Make sure the right shift sign extended when it should.
1935 if (srcElem1 < 0 && destElem >= 0) {
1936 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1937 1 - shiftAmt));
1938 }
1939 destElem += rBit;
1940 } else if (shiftAmt > 0) {
1941 bool sat = false;
1942 if (shiftAmt >= sizeof(Element) * 8) {
1943 if (srcElem1 != 0)
1944 sat = true;
1945 else
1946 destElem = 0;
1947 } else {
1948 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1949 sizeof(Element) * 8 - 1 - shiftAmt) !=
1950 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1951 sat = true;
1952 } else {
1953 destElem = srcElem1 << shiftAmt;
1954 }
1955 }
1956 if (sat) {
1957 fpscr.qc = 1;
1958 destElem = mask(sizeof(Element) * 8 - 1);
1959 if (srcElem1 < 0)
1960 destElem = ~destElem;
1961 }
1962 } else {
1963 destElem = srcElem1;
1964 }
1965 FpscrQc = fpscr;
1966 '''
1967 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1968 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1969
1970 vabaCode = '''
1971 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1972 (srcElem2 - srcElem1);
1973 '''
1974 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1975 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1976 vabalCode = '''
1977 destElem += (srcElem1 > srcElem2) ?
1978 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1979 ((BigElement)srcElem2 - (BigElement)srcElem1);
1980 '''
1981 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1982
1983 vabdCode = '''
1984 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1985 (srcElem2 - srcElem1);
1986 '''
1987 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1988 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1989 vabdlCode = '''
1990 destElem = (srcElem1 > srcElem2) ?
1991 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1992 ((BigElement)srcElem2 - (BigElement)srcElem1);
1993 '''
1994 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1995
1996 vtstCode = '''
1997 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1998 '''
1999 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2000 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2001
2002 vmulCode = '''
2003 destElem = srcElem1 * srcElem2;
2004 '''
2005 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2006 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2007 vmullCode = '''
2008 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2009 '''
2010 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2011
2012 vmlaCode = '''
2013 destElem = destElem + srcElem1 * srcElem2;
2014 '''
2015 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2016 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2017 vmlalCode = '''
2018 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2019 '''
2020 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2021
2022 vqdmlalCode = '''
2023 FPSCR fpscr = (FPSCR) FpscrQc;
2024 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2025 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2026 Element halfNeg = maxNeg / 2;
2027 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2028 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2029 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2030 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2031 fpscr.qc = 1;
2032 }
2033 bool negPreDest = ltz(destElem);
2034 destElem += midElem;
2035 bool negDest = ltz(destElem);
2036 bool negMid = ltz(midElem);
2037 if (negPreDest == negMid && negMid != negDest) {
2038 destElem = mask(sizeof(BigElement) * 8 - 1);
2039 if (negPreDest)
2040 destElem = ~destElem;
2041 fpscr.qc = 1;
2042 }
2043 FpscrQc = fpscr;
2044 '''
2045 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2046
2047 vqdmlslCode = '''
2048 FPSCR fpscr = (FPSCR) FpscrQc;
2049 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2050 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2051 Element halfNeg = maxNeg / 2;
2052 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2053 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2054 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2055 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2056 fpscr.qc = 1;
2057 }
2058 bool negPreDest = ltz(destElem);
2059 destElem -= midElem;
2060 bool negDest = ltz(destElem);
2061 bool posMid = ltz((BigElement)-midElem);
2062 if (negPreDest == posMid && posMid != negDest) {
2063 destElem = mask(sizeof(BigElement) * 8 - 1);
2064 if (negPreDest)
2065 destElem = ~destElem;
2066 fpscr.qc = 1;
2067 }
2068 FpscrQc = fpscr;
2069 '''
2070 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2071
2072 vqdmullCode = '''
2073 FPSCR fpscr = (FPSCR) FpscrQc;
2074 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2075 if (srcElem1 == srcElem2 &&
2076 srcElem1 == (Element)((Element)1 <<
2077 (Element)(sizeof(Element) * 8 - 1))) {
2078 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2079 fpscr.qc = 1;
2080 }
2081 FpscrQc = fpscr;
2082 '''
2083 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2084
2085 vmlsCode = '''
2086 destElem = destElem - srcElem1 * srcElem2;
2087 '''
2088 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2089 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2090 vmlslCode = '''
2091 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2092 '''
2093 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2094
2095 vmulpCode = '''
2096 destElem = 0;
2097 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2098 if (bits(srcElem2, j))
2099 destElem ^= srcElem1 << j;
2100 }
2101 '''
2102 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2103 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2104 vmullpCode = '''
2105 destElem = 0;
2106 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2107 if (bits(srcElem2, j))
2108 destElem ^= (BigElement)srcElem1 << j;
2109 }
2110 '''
2111 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2112
1621 vaddlwCode = '''
1622 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1623 '''
1624 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1625 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1626 vaddhnCode = '''
1627 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1628 (sizeof(Element) * 8);
1629 '''
1630 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1631 vraddhnCode = '''
1632 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1633 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1634 (sizeof(Element) * 8);
1635 '''
1636 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1637
1638 vsubCode = '''
1639 destElem = srcElem1 - srcElem2;
1640 '''
1641 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1642 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1643 vsublwCode = '''
1644 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1645 '''
1646 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1647 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1648
1649 vqaddUCode = '''
1650 destElem = srcElem1 + srcElem2;
1651 FPSCR fpscr = (FPSCR) FpscrQc;
1652 if (destElem < srcElem1 || destElem < srcElem2) {
1653 destElem = (Element)(-1);
1654 fpscr.qc = 1;
1655 }
1656 FpscrQc = fpscr;
1657 '''
1658 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1659 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1660 vsubhnCode = '''
1661 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1662 (sizeof(Element) * 8);
1663 '''
1664 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1665 vrsubhnCode = '''
1666 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1667 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1668 (sizeof(Element) * 8);
1669 '''
1670 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1671
1672 vqaddSCode = '''
1673 destElem = srcElem1 + srcElem2;
1674 FPSCR fpscr = (FPSCR) FpscrQc;
1675 bool negDest = (destElem < 0);
1676 bool negSrc1 = (srcElem1 < 0);
1677 bool negSrc2 = (srcElem2 < 0);
1678 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1679 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1680 if (negDest)
1681 destElem -= 1;
1682 fpscr.qc = 1;
1683 }
1684 FpscrQc = fpscr;
1685 '''
1686 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1687 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1688
1689 vqsubUCode = '''
1690 destElem = srcElem1 - srcElem2;
1691 FPSCR fpscr = (FPSCR) FpscrQc;
1692 if (destElem > srcElem1) {
1693 destElem = 0;
1694 fpscr.qc = 1;
1695 }
1696 FpscrQc = fpscr;
1697 '''
1698 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1699 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1700
1701 vqsubSCode = '''
1702 destElem = srcElem1 - srcElem2;
1703 FPSCR fpscr = (FPSCR) FpscrQc;
1704 bool negDest = (destElem < 0);
1705 bool negSrc1 = (srcElem1 < 0);
1706 bool posSrc2 = (srcElem2 >= 0);
1707 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1708 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1709 if (negDest)
1710 destElem -= 1;
1711 fpscr.qc = 1;
1712 }
1713 FpscrQc = fpscr;
1714 '''
1715 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1716 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1717
1718 vcgtCode = '''
1719 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1720 '''
1721 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1722 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1723
1724 vcgeCode = '''
1725 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1726 '''
1727 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1728 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1729
1730 vceqCode = '''
1731 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1732 '''
1733 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1734 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1735
1736 vshlCode = '''
1737 int16_t shiftAmt = (int8_t)srcElem2;
1738 if (shiftAmt < 0) {
1739 shiftAmt = -shiftAmt;
1740 if (shiftAmt >= sizeof(Element) * 8) {
1741 shiftAmt = sizeof(Element) * 8 - 1;
1742 destElem = 0;
1743 } else {
1744 destElem = (srcElem1 >> shiftAmt);
1745 }
1746 // Make sure the right shift sign extended when it should.
1747 if (ltz(srcElem1) && !ltz(destElem)) {
1748 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1749 1 - shiftAmt));
1750 }
1751 } else {
1752 if (shiftAmt >= sizeof(Element) * 8) {
1753 destElem = 0;
1754 } else {
1755 destElem = srcElem1 << shiftAmt;
1756 }
1757 }
1758 '''
1759 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1760 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1761
1762 vrshlCode = '''
1763 int16_t shiftAmt = (int8_t)srcElem2;
1764 if (shiftAmt < 0) {
1765 shiftAmt = -shiftAmt;
1766 Element rBit = 0;
1767 if (shiftAmt <= sizeof(Element) * 8)
1768 rBit = bits(srcElem1, shiftAmt - 1);
1769 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1770 rBit = 1;
1771 if (shiftAmt >= sizeof(Element) * 8) {
1772 shiftAmt = sizeof(Element) * 8 - 1;
1773 destElem = 0;
1774 } else {
1775 destElem = (srcElem1 >> shiftAmt);
1776 }
1777 // Make sure the right shift sign extended when it should.
1778 if (ltz(srcElem1) && !ltz(destElem)) {
1779 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1780 1 - shiftAmt));
1781 }
1782 destElem += rBit;
1783 } else if (shiftAmt > 0) {
1784 if (shiftAmt >= sizeof(Element) * 8) {
1785 destElem = 0;
1786 } else {
1787 destElem = srcElem1 << shiftAmt;
1788 }
1789 } else {
1790 destElem = srcElem1;
1791 }
1792 '''
1793 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1794 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1795
1796 vqshlUCode = '''
1797 int16_t shiftAmt = (int8_t)srcElem2;
1798 FPSCR fpscr = (FPSCR) FpscrQc;
1799 if (shiftAmt < 0) {
1800 shiftAmt = -shiftAmt;
1801 if (shiftAmt >= sizeof(Element) * 8) {
1802 shiftAmt = sizeof(Element) * 8 - 1;
1803 destElem = 0;
1804 } else {
1805 destElem = (srcElem1 >> shiftAmt);
1806 }
1807 } else if (shiftAmt > 0) {
1808 if (shiftAmt >= sizeof(Element) * 8) {
1809 if (srcElem1 != 0) {
1810 destElem = mask(sizeof(Element) * 8);
1811 fpscr.qc = 1;
1812 } else {
1813 destElem = 0;
1814 }
1815 } else {
1816 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1817 sizeof(Element) * 8 - shiftAmt)) {
1818 destElem = mask(sizeof(Element) * 8);
1819 fpscr.qc = 1;
1820 } else {
1821 destElem = srcElem1 << shiftAmt;
1822 }
1823 }
1824 } else {
1825 destElem = srcElem1;
1826 }
1827 FpscrQc = fpscr;
1828 '''
1829 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1830 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1831
1832 vqshlSCode = '''
1833 int16_t shiftAmt = (int8_t)srcElem2;
1834 FPSCR fpscr = (FPSCR) FpscrQc;
1835 if (shiftAmt < 0) {
1836 shiftAmt = -shiftAmt;
1837 if (shiftAmt >= sizeof(Element) * 8) {
1838 shiftAmt = sizeof(Element) * 8 - 1;
1839 destElem = 0;
1840 } else {
1841 destElem = (srcElem1 >> shiftAmt);
1842 }
1843 // Make sure the right shift sign extended when it should.
1844 if (srcElem1 < 0 && destElem >= 0) {
1845 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1846 1 - shiftAmt));
1847 }
1848 } else if (shiftAmt > 0) {
1849 bool sat = false;
1850 if (shiftAmt >= sizeof(Element) * 8) {
1851 if (srcElem1 != 0)
1852 sat = true;
1853 else
1854 destElem = 0;
1855 } else {
1856 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1857 sizeof(Element) * 8 - 1 - shiftAmt) !=
1858 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1859 sat = true;
1860 } else {
1861 destElem = srcElem1 << shiftAmt;
1862 }
1863 }
1864 if (sat) {
1865 fpscr.qc = 1;
1866 destElem = mask(sizeof(Element) * 8 - 1);
1867 if (srcElem1 < 0)
1868 destElem = ~destElem;
1869 }
1870 } else {
1871 destElem = srcElem1;
1872 }
1873 FpscrQc = fpscr;
1874 '''
1875 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1876 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1877
1878 vqrshlUCode = '''
1879 int16_t shiftAmt = (int8_t)srcElem2;
1880 FPSCR fpscr = (FPSCR) FpscrQc;
1881 if (shiftAmt < 0) {
1882 shiftAmt = -shiftAmt;
1883 Element rBit = 0;
1884 if (shiftAmt <= sizeof(Element) * 8)
1885 rBit = bits(srcElem1, shiftAmt - 1);
1886 if (shiftAmt >= sizeof(Element) * 8) {
1887 shiftAmt = sizeof(Element) * 8 - 1;
1888 destElem = 0;
1889 } else {
1890 destElem = (srcElem1 >> shiftAmt);
1891 }
1892 destElem += rBit;
1893 } else {
1894 if (shiftAmt >= sizeof(Element) * 8) {
1895 if (srcElem1 != 0) {
1896 destElem = mask(sizeof(Element) * 8);
1897 fpscr.qc = 1;
1898 } else {
1899 destElem = 0;
1900 }
1901 } else {
1902 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1903 sizeof(Element) * 8 - shiftAmt)) {
1904 destElem = mask(sizeof(Element) * 8);
1905 fpscr.qc = 1;
1906 } else {
1907 destElem = srcElem1 << shiftAmt;
1908 }
1909 }
1910 }
1911 FpscrQc = fpscr;
1912 '''
1913 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1914 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1915
1916 vqrshlSCode = '''
1917 int16_t shiftAmt = (int8_t)srcElem2;
1918 FPSCR fpscr = (FPSCR) FpscrQc;
1919 if (shiftAmt < 0) {
1920 shiftAmt = -shiftAmt;
1921 Element rBit = 0;
1922 if (shiftAmt <= sizeof(Element) * 8)
1923 rBit = bits(srcElem1, shiftAmt - 1);
1924 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1925 rBit = 1;
1926 if (shiftAmt >= sizeof(Element) * 8) {
1927 shiftAmt = sizeof(Element) * 8 - 1;
1928 destElem = 0;
1929 } else {
1930 destElem = (srcElem1 >> shiftAmt);
1931 }
1932 // Make sure the right shift sign extended when it should.
1933 if (srcElem1 < 0 && destElem >= 0) {
1934 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1935 1 - shiftAmt));
1936 }
1937 destElem += rBit;
1938 } else if (shiftAmt > 0) {
1939 bool sat = false;
1940 if (shiftAmt >= sizeof(Element) * 8) {
1941 if (srcElem1 != 0)
1942 sat = true;
1943 else
1944 destElem = 0;
1945 } else {
1946 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1947 sizeof(Element) * 8 - 1 - shiftAmt) !=
1948 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1949 sat = true;
1950 } else {
1951 destElem = srcElem1 << shiftAmt;
1952 }
1953 }
1954 if (sat) {
1955 fpscr.qc = 1;
1956 destElem = mask(sizeof(Element) * 8 - 1);
1957 if (srcElem1 < 0)
1958 destElem = ~destElem;
1959 }
1960 } else {
1961 destElem = srcElem1;
1962 }
1963 FpscrQc = fpscr;
1964 '''
1965 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1966 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1967
1968 vabaCode = '''
1969 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1970 (srcElem2 - srcElem1);
1971 '''
1972 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1973 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1974 vabalCode = '''
1975 destElem += (srcElem1 > srcElem2) ?
1976 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1977 ((BigElement)srcElem2 - (BigElement)srcElem1);
1978 '''
1979 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1980
1981 vabdCode = '''
1982 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1983 (srcElem2 - srcElem1);
1984 '''
1985 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1986 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1987 vabdlCode = '''
1988 destElem = (srcElem1 > srcElem2) ?
1989 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1990 ((BigElement)srcElem2 - (BigElement)srcElem1);
1991 '''
1992 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1993
1994 vtstCode = '''
1995 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1996 '''
1997 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1998 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1999
2000 vmulCode = '''
2001 destElem = srcElem1 * srcElem2;
2002 '''
2003 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2004 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2005 vmullCode = '''
2006 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2007 '''
2008 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2009
2010 vmlaCode = '''
2011 destElem = destElem + srcElem1 * srcElem2;
2012 '''
2013 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2014 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2015 vmlalCode = '''
2016 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2017 '''
2018 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2019
2020 vqdmlalCode = '''
2021 FPSCR fpscr = (FPSCR) FpscrQc;
2022 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2023 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2024 Element halfNeg = maxNeg / 2;
2025 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2026 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2027 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2028 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2029 fpscr.qc = 1;
2030 }
2031 bool negPreDest = ltz(destElem);
2032 destElem += midElem;
2033 bool negDest = ltz(destElem);
2034 bool negMid = ltz(midElem);
2035 if (negPreDest == negMid && negMid != negDest) {
2036 destElem = mask(sizeof(BigElement) * 8 - 1);
2037 if (negPreDest)
2038 destElem = ~destElem;
2039 fpscr.qc = 1;
2040 }
2041 FpscrQc = fpscr;
2042 '''
2043 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2044
2045 vqdmlslCode = '''
2046 FPSCR fpscr = (FPSCR) FpscrQc;
2047 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2048 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2049 Element halfNeg = maxNeg / 2;
2050 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2051 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2052 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2053 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2054 fpscr.qc = 1;
2055 }
2056 bool negPreDest = ltz(destElem);
2057 destElem -= midElem;
2058 bool negDest = ltz(destElem);
2059 bool posMid = ltz((BigElement)-midElem);
2060 if (negPreDest == posMid && posMid != negDest) {
2061 destElem = mask(sizeof(BigElement) * 8 - 1);
2062 if (negPreDest)
2063 destElem = ~destElem;
2064 fpscr.qc = 1;
2065 }
2066 FpscrQc = fpscr;
2067 '''
2068 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2069
2070 vqdmullCode = '''
2071 FPSCR fpscr = (FPSCR) FpscrQc;
2072 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2073 if (srcElem1 == srcElem2 &&
2074 srcElem1 == (Element)((Element)1 <<
2075 (Element)(sizeof(Element) * 8 - 1))) {
2076 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2077 fpscr.qc = 1;
2078 }
2079 FpscrQc = fpscr;
2080 '''
2081 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2082
2083 vmlsCode = '''
2084 destElem = destElem - srcElem1 * srcElem2;
2085 '''
2086 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2087 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2088 vmlslCode = '''
2089 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2090 '''
2091 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2092
2093 vmulpCode = '''
2094 destElem = 0;
2095 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2096 if (bits(srcElem2, j))
2097 destElem ^= srcElem1 << j;
2098 }
2099 '''
2100 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2101 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2102 vmullpCode = '''
2103 destElem = 0;
2104 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2105 if (bits(srcElem2, j))
2106 destElem ^= (BigElement)srcElem1 << j;
2107 }
2108 '''
2109 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2110
2113 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2114 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2111 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2115
2112
2116 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2117 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2113 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2118
2119 vqdmulhCode = '''
2120 FPSCR fpscr = (FPSCR) FpscrQc;
2121 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2122 (sizeof(Element) * 8);
2123 if (srcElem1 == srcElem2 &&
2124 srcElem1 == (Element)((Element)1 <<
2125 (sizeof(Element) * 8 - 1))) {
2126 destElem = ~srcElem1;
2127 fpscr.qc = 1;
2128 }
2129 FpscrQc = fpscr;
2130 '''
2131 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2132 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2133
2134 vqrdmulhCode = '''
2135 FPSCR fpscr = (FPSCR) FpscrQc;
2136 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2137 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2138 (sizeof(Element) * 8);
2139 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2140 Element halfNeg = maxNeg / 2;
2141 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2142 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2143 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2144 if (destElem < 0) {
2145 destElem = mask(sizeof(Element) * 8 - 1);
2146 } else {
2147 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2148 }
2149 fpscr.qc = 1;
2150 }
2151 FpscrQc = fpscr;
2152 '''
2153 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2154 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2155 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2156 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2157
2158 vmaxfpCode = '''
2159 FPSCR fpscr = (FPSCR) FpscrExc;
2160 bool done;
2161 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2162 if (!done) {
2163 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2164 true, true, VfpRoundNearest);
2165 } else if (flushToZero(srcReg1, srcReg2)) {
2166 fpscr.idc = 1;
2167 }
2168 FpscrExc = fpscr;
2169 '''
2170 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2171 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2172
2173 vminfpCode = '''
2174 FPSCR fpscr = (FPSCR) FpscrExc;
2175 bool done;
2176 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2177 if (!done) {
2178 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2179 true, true, VfpRoundNearest);
2180 } else if (flushToZero(srcReg1, srcReg2)) {
2181 fpscr.idc = 1;
2182 }
2183 FpscrExc = fpscr;
2184 '''
2185 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2186 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2187
2188 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2189 2, vmaxfpCode, pairwise=True)
2190 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2191 4, vmaxfpCode, pairwise=True)
2192
2193 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2194 2, vminfpCode, pairwise=True)
2195 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2196 4, vminfpCode, pairwise=True)
2197
2198 vaddfpCode = '''
2199 FPSCR fpscr = (FPSCR) FpscrExc;
2200 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2201 true, true, VfpRoundNearest);
2202 FpscrExc = fpscr;
2203 '''
2204 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2205 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2206
2207 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2208 2, vaddfpCode, pairwise=True)
2209 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2210 4, vaddfpCode, pairwise=True)
2211
2212 vsubfpCode = '''
2213 FPSCR fpscr = (FPSCR) FpscrExc;
2214 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2215 true, true, VfpRoundNearest);
2216 FpscrExc = fpscr;
2217 '''
2218 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2219 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2220
2221 vmulfpCode = '''
2222 FPSCR fpscr = (FPSCR) FpscrExc;
2223 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2224 true, true, VfpRoundNearest);
2225 FpscrExc = fpscr;
2226 '''
2227 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2228 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2229
2230 vmlafpCode = '''
2231 FPSCR fpscr = (FPSCR) FpscrExc;
2232 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2233 true, true, VfpRoundNearest);
2234 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2235 true, true, VfpRoundNearest);
2236 FpscrExc = fpscr;
2237 '''
2238 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2239 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2240
2241 vmlsfpCode = '''
2242 FPSCR fpscr = (FPSCR) FpscrExc;
2243 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2244 true, true, VfpRoundNearest);
2245 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2246 true, true, VfpRoundNearest);
2247 FpscrExc = fpscr;
2248 '''
2249 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2250 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2251
2252 vcgtfpCode = '''
2253 FPSCR fpscr = (FPSCR) FpscrExc;
2254 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2255 true, true, VfpRoundNearest);
2256 destReg = (res == 0) ? -1 : 0;
2257 if (res == 2.0)
2258 fpscr.ioc = 1;
2259 FpscrExc = fpscr;
2260 '''
2261 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2262 2, vcgtfpCode, toInt = True)
2263 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2264 4, vcgtfpCode, toInt = True)
2265
2266 vcgefpCode = '''
2267 FPSCR fpscr = (FPSCR) FpscrExc;
2268 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2269 true, true, VfpRoundNearest);
2270 destReg = (res == 0) ? -1 : 0;
2271 if (res == 2.0)
2272 fpscr.ioc = 1;
2273 FpscrExc = fpscr;
2274 '''
2275 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2276 2, vcgefpCode, toInt = True)
2277 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2278 4, vcgefpCode, toInt = True)
2279
2280 vacgtfpCode = '''
2281 FPSCR fpscr = (FPSCR) FpscrExc;
2282 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2283 true, true, VfpRoundNearest);
2284 destReg = (res == 0) ? -1 : 0;
2285 if (res == 2.0)
2286 fpscr.ioc = 1;
2287 FpscrExc = fpscr;
2288 '''
2289 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2290 2, vacgtfpCode, toInt = True)
2291 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2292 4, vacgtfpCode, toInt = True)
2293
2294 vacgefpCode = '''
2295 FPSCR fpscr = (FPSCR) FpscrExc;
2296 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2297 true, true, VfpRoundNearest);
2298 destReg = (res == 0) ? -1 : 0;
2299 if (res == 2.0)
2300 fpscr.ioc = 1;
2301 FpscrExc = fpscr;
2302 '''
2303 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2304 2, vacgefpCode, toInt = True)
2305 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2306 4, vacgefpCode, toInt = True)
2307
2308 vceqfpCode = '''
2309 FPSCR fpscr = (FPSCR) FpscrExc;
2310 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2311 true, true, VfpRoundNearest);
2312 destReg = (res == 0) ? -1 : 0;
2313 if (res == 2.0)
2314 fpscr.ioc = 1;
2315 FpscrExc = fpscr;
2316 '''
2317 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2318 2, vceqfpCode, toInt = True)
2319 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2320 4, vceqfpCode, toInt = True)
2321
2322 vrecpsCode = '''
2323 FPSCR fpscr = (FPSCR) FpscrExc;
2324 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2325 true, true, VfpRoundNearest);
2326 FpscrExc = fpscr;
2327 '''
2328 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2329 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2330
2331 vrsqrtsCode = '''
2332 FPSCR fpscr = (FPSCR) FpscrExc;
2333 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2334 true, true, VfpRoundNearest);
2335 FpscrExc = fpscr;
2336 '''
2337 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2338 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2339
2340 vabdfpCode = '''
2341 FPSCR fpscr = (FPSCR) FpscrExc;
2342 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2343 true, true, VfpRoundNearest);
2344 destReg = fabs(mid);
2345 FpscrExc = fpscr;
2346 '''
2347 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2348 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2349
2350 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2351 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2352 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2353 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2354 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2355
2356 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2357 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2358 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2359 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2360 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2361
2362 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2363 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2364 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2365 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2366 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2367
2368 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2369 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2370 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2371 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2372 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2373 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2374 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2375 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2376 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2377
2378 vshrCode = '''
2379 if (imm >= sizeof(srcElem1) * 8) {
2380 if (ltz(srcElem1))
2381 destElem = -1;
2382 else
2383 destElem = 0;
2384 } else {
2385 destElem = srcElem1 >> imm;
2386 }
2387 '''
2388 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2389 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2390
2391 vsraCode = '''
2392 Element mid;;
2393 if (imm >= sizeof(srcElem1) * 8) {
2394 mid = ltz(srcElem1) ? -1 : 0;
2395 } else {
2396 mid = srcElem1 >> imm;
2397 if (ltz(srcElem1) && !ltz(mid)) {
2398 mid |= -(mid & ((Element)1 <<
2399 (sizeof(Element) * 8 - 1 - imm)));
2400 }
2401 }
2402 destElem += mid;
2403 '''
2404 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2405 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2406
2407 vrshrCode = '''
2408 if (imm > sizeof(srcElem1) * 8) {
2409 destElem = 0;
2410 } else if (imm) {
2411 Element rBit = bits(srcElem1, imm - 1);
2412 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2413 } else {
2414 destElem = srcElem1;
2415 }
2416 '''
2417 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2418 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2419
2420 vrsraCode = '''
2421 if (imm > sizeof(srcElem1) * 8) {
2422 destElem += 0;
2423 } else if (imm) {
2424 Element rBit = bits(srcElem1, imm - 1);
2425 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2426 } else {
2427 destElem += srcElem1;
2428 }
2429 '''
2430 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2431 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2432
2433 vsriCode = '''
2434 if (imm >= sizeof(Element) * 8)
2435 destElem = destElem;
2436 else
2437 destElem = (srcElem1 >> imm) |
2438 (destElem & ~mask(sizeof(Element) * 8 - imm));
2439 '''
2440 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2441 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2442
2443 vshlCode = '''
2444 if (imm >= sizeof(Element) * 8)
2445 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2446 else
2447 destElem = srcElem1 << imm;
2448 '''
2449 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2450 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2451
2452 vsliCode = '''
2453 if (imm >= sizeof(Element) * 8)
2454 destElem = destElem;
2455 else
2456 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2457 '''
2458 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2459 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2460
2461 vqshlCode = '''
2462 FPSCR fpscr = (FPSCR) FpscrQc;
2463 if (imm >= sizeof(Element) * 8) {
2464 if (srcElem1 != 0) {
2465 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2466 if (srcElem1 > 0)
2467 destElem = ~destElem;
2468 fpscr.qc = 1;
2469 } else {
2470 destElem = 0;
2471 }
2472 } else if (imm) {
2473 destElem = (srcElem1 << imm);
2474 uint64_t topBits = bits((uint64_t)srcElem1,
2475 sizeof(Element) * 8 - 1,
2476 sizeof(Element) * 8 - 1 - imm);
2477 if (topBits != 0 && topBits != mask(imm + 1)) {
2478 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2479 if (srcElem1 > 0)
2480 destElem = ~destElem;
2481 fpscr.qc = 1;
2482 }
2483 } else {
2484 destElem = srcElem1;
2485 }
2486 FpscrQc = fpscr;
2487 '''
2488 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2489 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2490
2491 vqshluCode = '''
2492 FPSCR fpscr = (FPSCR) FpscrQc;
2493 if (imm >= sizeof(Element) * 8) {
2494 if (srcElem1 != 0) {
2495 destElem = mask(sizeof(Element) * 8);
2496 fpscr.qc = 1;
2497 } else {
2498 destElem = 0;
2499 }
2500 } else if (imm) {
2501 destElem = (srcElem1 << imm);
2502 uint64_t topBits = bits((uint64_t)srcElem1,
2503 sizeof(Element) * 8 - 1,
2504 sizeof(Element) * 8 - imm);
2505 if (topBits != 0) {
2506 destElem = mask(sizeof(Element) * 8);
2507 fpscr.qc = 1;
2508 }
2509 } else {
2510 destElem = srcElem1;
2511 }
2512 FpscrQc = fpscr;
2513 '''
2514 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2515 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2516
2517 vqshlusCode = '''
2518 FPSCR fpscr = (FPSCR) FpscrQc;
2519 if (imm >= sizeof(Element) * 8) {
2520 if (srcElem1 < 0) {
2521 destElem = 0;
2522 fpscr.qc = 1;
2523 } else if (srcElem1 > 0) {
2524 destElem = mask(sizeof(Element) * 8);
2525 fpscr.qc = 1;
2526 } else {
2527 destElem = 0;
2528 }
2529 } else if (imm) {
2530 destElem = (srcElem1 << imm);
2531 uint64_t topBits = bits((uint64_t)srcElem1,
2532 sizeof(Element) * 8 - 1,
2533 sizeof(Element) * 8 - imm);
2534 if (srcElem1 < 0) {
2535 destElem = 0;
2536 fpscr.qc = 1;
2537 } else if (topBits != 0) {
2538 destElem = mask(sizeof(Element) * 8);
2539 fpscr.qc = 1;
2540 }
2541 } else {
2542 if (srcElem1 < 0) {
2543 fpscr.qc = 1;
2544 destElem = 0;
2545 } else {
2546 destElem = srcElem1;
2547 }
2548 }
2549 FpscrQc = fpscr;
2550 '''
2551 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2552 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2553
2554 vshrnCode = '''
2555 if (imm >= sizeof(srcElem1) * 8) {
2556 destElem = 0;
2557 } else {
2558 destElem = srcElem1 >> imm;
2559 }
2560 '''
2561 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2562
2563 vrshrnCode = '''
2564 if (imm > sizeof(srcElem1) * 8) {
2565 destElem = 0;
2566 } else if (imm) {
2567 Element rBit = bits(srcElem1, imm - 1);
2568 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2569 } else {
2570 destElem = srcElem1;
2571 }
2572 '''
2573 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2574
2575 vqshrnCode = '''
2576 FPSCR fpscr = (FPSCR) FpscrQc;
2577 if (imm > sizeof(srcElem1) * 8) {
2578 if (srcElem1 != 0 && srcElem1 != -1)
2579 fpscr.qc = 1;
2580 destElem = 0;
2581 } else if (imm) {
2582 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2583 mid |= -(mid & ((BigElement)1 <<
2584 (sizeof(BigElement) * 8 - 1 - imm)));
2585 if (mid != (Element)mid) {
2586 destElem = mask(sizeof(Element) * 8 - 1);
2587 if (srcElem1 < 0)
2588 destElem = ~destElem;
2589 fpscr.qc = 1;
2590 } else {
2591 destElem = mid;
2592 }
2593 } else {
2594 destElem = srcElem1;
2595 }
2596 FpscrQc = fpscr;
2597 '''
2598 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2599
2600 vqshrunCode = '''
2601 FPSCR fpscr = (FPSCR) FpscrQc;
2602 if (imm > sizeof(srcElem1) * 8) {
2603 if (srcElem1 != 0)
2604 fpscr.qc = 1;
2605 destElem = 0;
2606 } else if (imm) {
2607 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2608 if (mid != (Element)mid) {
2609 destElem = mask(sizeof(Element) * 8);
2610 fpscr.qc = 1;
2611 } else {
2612 destElem = mid;
2613 }
2614 } else {
2615 destElem = srcElem1;
2616 }
2617 FpscrQc = fpscr;
2618 '''
2619 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2620 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2621
2622 vqshrunsCode = '''
2623 FPSCR fpscr = (FPSCR) FpscrQc;
2624 if (imm > sizeof(srcElem1) * 8) {
2625 if (srcElem1 != 0)
2626 fpscr.qc = 1;
2627 destElem = 0;
2628 } else if (imm) {
2629 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2630 if (bits(mid, sizeof(BigElement) * 8 - 1,
2631 sizeof(Element) * 8) != 0) {
2632 if (srcElem1 < 0) {
2633 destElem = 0;
2634 } else {
2635 destElem = mask(sizeof(Element) * 8);
2636 }
2637 fpscr.qc = 1;
2638 } else {
2639 destElem = mid;
2640 }
2641 } else {
2642 destElem = srcElem1;
2643 }
2644 FpscrQc = fpscr;
2645 '''
2646 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2647 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2648
2649 vqrshrnCode = '''
2650 FPSCR fpscr = (FPSCR) FpscrQc;
2651 if (imm > sizeof(srcElem1) * 8) {
2652 if (srcElem1 != 0 && srcElem1 != -1)
2653 fpscr.qc = 1;
2654 destElem = 0;
2655 } else if (imm) {
2656 BigElement mid = (srcElem1 >> (imm - 1));
2657 uint64_t rBit = mid & 0x1;
2658 mid >>= 1;
2659 mid |= -(mid & ((BigElement)1 <<
2660 (sizeof(BigElement) * 8 - 1 - imm)));
2661 mid += rBit;
2662 if (mid != (Element)mid) {
2663 destElem = mask(sizeof(Element) * 8 - 1);
2664 if (srcElem1 < 0)
2665 destElem = ~destElem;
2666 fpscr.qc = 1;
2667 } else {
2668 destElem = mid;
2669 }
2670 } else {
2671 if (srcElem1 != (Element)srcElem1) {
2672 destElem = mask(sizeof(Element) * 8 - 1);
2673 if (srcElem1 < 0)
2674 destElem = ~destElem;
2675 fpscr.qc = 1;
2676 } else {
2677 destElem = srcElem1;
2678 }
2679 }
2680 FpscrQc = fpscr;
2681 '''
2682 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2683 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2684
2685 vqrshrunCode = '''
2686 FPSCR fpscr = (FPSCR) FpscrQc;
2687 if (imm > sizeof(srcElem1) * 8) {
2688 if (srcElem1 != 0)
2689 fpscr.qc = 1;
2690 destElem = 0;
2691 } else if (imm) {
2692 BigElement mid = (srcElem1 >> (imm - 1));
2693 uint64_t rBit = mid & 0x1;
2694 mid >>= 1;
2695 mid += rBit;
2696 if (mid != (Element)mid) {
2697 destElem = mask(sizeof(Element) * 8);
2698 fpscr.qc = 1;
2699 } else {
2700 destElem = mid;
2701 }
2702 } else {
2703 if (srcElem1 != (Element)srcElem1) {
2704 destElem = mask(sizeof(Element) * 8 - 1);
2705 fpscr.qc = 1;
2706 } else {
2707 destElem = srcElem1;
2708 }
2709 }
2710 FpscrQc = fpscr;
2711 '''
2712 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2713 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2714
2715 vqrshrunsCode = '''
2716 FPSCR fpscr = (FPSCR) FpscrQc;
2717 if (imm > sizeof(srcElem1) * 8) {
2718 if (srcElem1 != 0)
2719 fpscr.qc = 1;
2720 destElem = 0;
2721 } else if (imm) {
2722 BigElement mid = (srcElem1 >> (imm - 1));
2723 uint64_t rBit = mid & 0x1;
2724 mid >>= 1;
2725 mid |= -(mid & ((BigElement)1 <<
2726 (sizeof(BigElement) * 8 - 1 - imm)));
2727 mid += rBit;
2728 if (bits(mid, sizeof(BigElement) * 8 - 1,
2729 sizeof(Element) * 8) != 0) {
2730 if (srcElem1 < 0) {
2731 destElem = 0;
2732 } else {
2733 destElem = mask(sizeof(Element) * 8);
2734 }
2735 fpscr.qc = 1;
2736 } else {
2737 destElem = mid;
2738 }
2739 } else {
2740 if (srcElem1 < 0) {
2741 fpscr.qc = 1;
2742 destElem = 0;
2743 } else {
2744 destElem = srcElem1;
2745 }
2746 }
2747 FpscrQc = fpscr;
2748 '''
2749 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2750 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2751
2752 vshllCode = '''
2753 if (imm >= sizeof(destElem) * 8) {
2754 destElem = 0;
2755 } else {
2756 destElem = (BigElement)srcElem1 << imm;
2757 }
2758 '''
2759 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2760
2761 vmovlCode = '''
2762 destElem = srcElem1;
2763 '''
2764 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2765
2766 vcvt2ufxCode = '''
2767 FPSCR fpscr = (FPSCR) FpscrExc;
2768 if (flushToZero(srcElem1))
2769 fpscr.idc = 1;
2770 VfpSavedState state = prepFpState(VfpRoundNearest);
2771 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2772 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2773 __asm__ __volatile__("" :: "m" (destReg));
2774 finishVfp(fpscr, state, true);
2775 FpscrExc = fpscr;
2776 '''
2777 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2778 2, vcvt2ufxCode, toInt = True)
2779 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2780 4, vcvt2ufxCode, toInt = True)
2781
2782 vcvt2sfxCode = '''
2783 FPSCR fpscr = (FPSCR) FpscrExc;
2784 if (flushToZero(srcElem1))
2785 fpscr.idc = 1;
2786 VfpSavedState state = prepFpState(VfpRoundNearest);
2787 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2788 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2789 __asm__ __volatile__("" :: "m" (destReg));
2790 finishVfp(fpscr, state, true);
2791 FpscrExc = fpscr;
2792 '''
2793 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2794 2, vcvt2sfxCode, toInt = True)
2795 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2796 4, vcvt2sfxCode, toInt = True)
2797
2798 vcvtu2fpCode = '''
2799 FPSCR fpscr = (FPSCR) FpscrExc;
2800 VfpSavedState state = prepFpState(VfpRoundNearest);
2801 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2802 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2803 __asm__ __volatile__("" :: "m" (destElem));
2804 finishVfp(fpscr, state, true);
2805 FpscrExc = fpscr;
2806 '''
2807 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2808 2, vcvtu2fpCode, fromInt = True)
2809 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2810 4, vcvtu2fpCode, fromInt = True)
2811
2812 vcvts2fpCode = '''
2813 FPSCR fpscr = (FPSCR) FpscrExc;
2814 VfpSavedState state = prepFpState(VfpRoundNearest);
2815 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2816 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2817 __asm__ __volatile__("" :: "m" (destElem));
2818 finishVfp(fpscr, state, true);
2819 FpscrExc = fpscr;
2820 '''
2821 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2822 2, vcvts2fpCode, fromInt = True)
2823 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2824 4, vcvts2fpCode, fromInt = True)
2825
2826 vcvts2hCode = '''
2827 FPSCR fpscr = (FPSCR) FpscrExc;
2828 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2829 if (flushToZero(srcFp1))
2830 fpscr.idc = 1;
2831 VfpSavedState state = prepFpState(VfpRoundNearest);
2832 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2833 : "m" (srcFp1), "m" (destElem));
2834 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2835 fpscr.ahp, srcFp1);
2836 __asm__ __volatile__("" :: "m" (destElem));
2837 finishVfp(fpscr, state, true);
2838 FpscrExc = fpscr;
2839 '''
2840 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2841
2842 vcvth2sCode = '''
2843 FPSCR fpscr = (FPSCR) FpscrExc;
2844 VfpSavedState state = prepFpState(VfpRoundNearest);
2845 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2846 : "m" (srcElem1), "m" (destElem));
2847 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2848 __asm__ __volatile__("" :: "m" (destElem));
2849 finishVfp(fpscr, state, true);
2850 FpscrExc = fpscr;
2851 '''
2852 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2853
2854 vrsqrteCode = '''
2855 destElem = unsignedRSqrtEstimate(srcElem1);
2856 '''
2857 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2858 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2859
2860 vrsqrtefpCode = '''
2861 FPSCR fpscr = (FPSCR) FpscrExc;
2862 if (flushToZero(srcReg1))
2863 fpscr.idc = 1;
2864 destReg = fprSqrtEstimate(fpscr, srcReg1);
2865 FpscrExc = fpscr;
2866 '''
2867 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2868 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2869
2870 vrecpeCode = '''
2871 destElem = unsignedRecipEstimate(srcElem1);
2872 '''
2873 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2874 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2875
2876 vrecpefpCode = '''
2877 FPSCR fpscr = (FPSCR) FpscrExc;
2878 if (flushToZero(srcReg1))
2879 fpscr.idc = 1;
2880 destReg = fpRecipEstimate(fpscr, srcReg1);
2881 FpscrExc = fpscr;
2882 '''
2883 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2884 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2885
2886 vrev16Code = '''
2887 destElem = srcElem1;
2888 unsigned groupSize = ((1 << 1) / sizeof(Element));
2889 unsigned reverseMask = (groupSize - 1);
2890 j = i ^ reverseMask;
2891 '''
2892 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2893 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2894 vrev32Code = '''
2895 destElem = srcElem1;
2896 unsigned groupSize = ((1 << 2) / sizeof(Element));
2897 unsigned reverseMask = (groupSize - 1);
2898 j = i ^ reverseMask;
2899 '''
2900 twoRegMiscInst("vrev32", "NVrev32D",
2901 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2902 twoRegMiscInst("vrev32", "NVrev32Q",
2903 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2904 vrev64Code = '''
2905 destElem = srcElem1;
2906 unsigned groupSize = ((1 << 3) / sizeof(Element));
2907 unsigned reverseMask = (groupSize - 1);
2908 j = i ^ reverseMask;
2909 '''
2910 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2911 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2912
2913 vpaddlCode = '''
2914 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2915 '''
2916 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2917 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2918
2919 vpadalCode = '''
2920 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2921 '''
2922 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2923 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2924
2925 vclsCode = '''
2926 unsigned count = 0;
2927 if (srcElem1 < 0) {
2928 srcElem1 <<= 1;
2929 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2930 count++;
2931 srcElem1 <<= 1;
2932 }
2933 } else {
2934 srcElem1 <<= 1;
2935 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2936 count++;
2937 srcElem1 <<= 1;
2938 }
2939 }
2940 destElem = count;
2941 '''
2942 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2943 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2944
2945 vclzCode = '''
2946 unsigned count = 0;
2947 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2948 count++;
2949 srcElem1 <<= 1;
2950 }
2951 destElem = count;
2952 '''
2953 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2954 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2955
2956 vcntCode = '''
2957 unsigned count = 0;
2958 while (srcElem1 && count < sizeof(Element) * 8) {
2959 count += srcElem1 & 0x1;
2960 srcElem1 >>= 1;
2961 }
2962 destElem = count;
2963 '''
2964
2965 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2966 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2967
2968 vmvnCode = '''
2969 destElem = ~srcElem1;
2970 '''
2971 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2972 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2973
2974 vqabsCode = '''
2975 FPSCR fpscr = (FPSCR) FpscrQc;
2976 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2977 fpscr.qc = 1;
2978 destElem = ~srcElem1;
2979 } else if (srcElem1 < 0) {
2980 destElem = -srcElem1;
2981 } else {
2982 destElem = srcElem1;
2983 }
2984 FpscrQc = fpscr;
2985 '''
2986 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2987 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2988
2989 vqnegCode = '''
2990 FPSCR fpscr = (FPSCR) FpscrQc;
2991 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2992 fpscr.qc = 1;
2993 destElem = ~srcElem1;
2994 } else {
2995 destElem = -srcElem1;
2996 }
2997 FpscrQc = fpscr;
2998 '''
2999 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3000 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3001
3002 vabsCode = '''
3003 if (srcElem1 < 0) {
3004 destElem = -srcElem1;
3005 } else {
3006 destElem = srcElem1;
3007 }
3008 '''
3009
3010 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3011 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3012 vabsfpCode = '''
3013 union
3014 {
3015 uint32_t i;
3016 float f;
3017 } cStruct;
3018 cStruct.f = srcReg1;
3019 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3020 destReg = cStruct.f;
3021 '''
3022 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3023 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3024
3025 vnegCode = '''
3026 destElem = -srcElem1;
3027 '''
3028 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3029 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3030 vnegfpCode = '''
3031 destReg = -srcReg1;
3032 '''
3033 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3034 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3035
3036 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3037 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3038 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3039 vcgtfpCode = '''
3040 FPSCR fpscr = (FPSCR) FpscrExc;
3041 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3042 true, true, VfpRoundNearest);
3043 destReg = (res == 0) ? -1 : 0;
3044 if (res == 2.0)
3045 fpscr.ioc = 1;
3046 FpscrExc = fpscr;
3047 '''
3048 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3049 2, vcgtfpCode, toInt = True)
3050 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3051 4, vcgtfpCode, toInt = True)
3052
3053 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3054 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3055 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3056 vcgefpCode = '''
3057 FPSCR fpscr = (FPSCR) FpscrExc;
3058 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3059 true, true, VfpRoundNearest);
3060 destReg = (res == 0) ? -1 : 0;
3061 if (res == 2.0)
3062 fpscr.ioc = 1;
3063 FpscrExc = fpscr;
3064 '''
3065 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3066 2, vcgefpCode, toInt = True)
3067 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3068 4, vcgefpCode, toInt = True)
3069
3070 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3071 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3072 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3073 vceqfpCode = '''
3074 FPSCR fpscr = (FPSCR) FpscrExc;
3075 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3076 true, true, VfpRoundNearest);
3077 destReg = (res == 0) ? -1 : 0;
3078 if (res == 2.0)
3079 fpscr.ioc = 1;
3080 FpscrExc = fpscr;
3081 '''
3082 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3083 2, vceqfpCode, toInt = True)
3084 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3085 4, vceqfpCode, toInt = True)
3086
3087 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3088 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3089 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3090 vclefpCode = '''
3091 FPSCR fpscr = (FPSCR) FpscrExc;
3092 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3093 true, true, VfpRoundNearest);
3094 destReg = (res == 0) ? -1 : 0;
3095 if (res == 2.0)
3096 fpscr.ioc = 1;
3097 FpscrExc = fpscr;
3098 '''
3099 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3100 2, vclefpCode, toInt = True)
3101 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3102 4, vclefpCode, toInt = True)
3103
3104 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3105 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3106 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3107 vcltfpCode = '''
3108 FPSCR fpscr = (FPSCR) FpscrExc;
3109 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3110 true, true, VfpRoundNearest);
3111 destReg = (res == 0) ? -1 : 0;
3112 if (res == 2.0)
3113 fpscr.ioc = 1;
3114 FpscrExc = fpscr;
3115 '''
3116 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3117 2, vcltfpCode, toInt = True)
3118 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3119 4, vcltfpCode, toInt = True)
3120
3121 vswpCode = '''
3122 FloatRegBits mid;
3123 for (unsigned r = 0; r < rCount; r++) {
3124 mid = srcReg1.regs[r];
3125 srcReg1.regs[r] = destReg.regs[r];
3126 destReg.regs[r] = mid;
3127 }
3128 '''
3129 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3130 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3131
3132 vtrnCode = '''
3133 Element mid;
3134 for (unsigned i = 0; i < eCount; i += 2) {
3135 mid = srcReg1.elements[i];
3136 srcReg1.elements[i] = destReg.elements[i + 1];
3137 destReg.elements[i + 1] = mid;
3138 }
3139 '''
2114
2115 vqdmulhCode = '''
2116 FPSCR fpscr = (FPSCR) FpscrQc;
2117 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2118 (sizeof(Element) * 8);
2119 if (srcElem1 == srcElem2 &&
2120 srcElem1 == (Element)((Element)1 <<
2121 (sizeof(Element) * 8 - 1))) {
2122 destElem = ~srcElem1;
2123 fpscr.qc = 1;
2124 }
2125 FpscrQc = fpscr;
2126 '''
2127 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2128 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2129
2130 vqrdmulhCode = '''
2131 FPSCR fpscr = (FPSCR) FpscrQc;
2132 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2133 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2134 (sizeof(Element) * 8);
2135 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2136 Element halfNeg = maxNeg / 2;
2137 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2138 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2139 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2140 if (destElem < 0) {
2141 destElem = mask(sizeof(Element) * 8 - 1);
2142 } else {
2143 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2144 }
2145 fpscr.qc = 1;
2146 }
2147 FpscrQc = fpscr;
2148 '''
2149 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2150 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2151 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2152 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2153
2154 vmaxfpCode = '''
2155 FPSCR fpscr = (FPSCR) FpscrExc;
2156 bool done;
2157 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2158 if (!done) {
2159 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2160 true, true, VfpRoundNearest);
2161 } else if (flushToZero(srcReg1, srcReg2)) {
2162 fpscr.idc = 1;
2163 }
2164 FpscrExc = fpscr;
2165 '''
2166 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2167 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2168
2169 vminfpCode = '''
2170 FPSCR fpscr = (FPSCR) FpscrExc;
2171 bool done;
2172 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2173 if (!done) {
2174 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2175 true, true, VfpRoundNearest);
2176 } else if (flushToZero(srcReg1, srcReg2)) {
2177 fpscr.idc = 1;
2178 }
2179 FpscrExc = fpscr;
2180 '''
2181 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2182 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2183
2184 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2185 2, vmaxfpCode, pairwise=True)
2186 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2187 4, vmaxfpCode, pairwise=True)
2188
2189 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2190 2, vminfpCode, pairwise=True)
2191 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2192 4, vminfpCode, pairwise=True)
2193
2194 vaddfpCode = '''
2195 FPSCR fpscr = (FPSCR) FpscrExc;
2196 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2197 true, true, VfpRoundNearest);
2198 FpscrExc = fpscr;
2199 '''
2200 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2201 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2202
2203 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2204 2, vaddfpCode, pairwise=True)
2205 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2206 4, vaddfpCode, pairwise=True)
2207
2208 vsubfpCode = '''
2209 FPSCR fpscr = (FPSCR) FpscrExc;
2210 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2211 true, true, VfpRoundNearest);
2212 FpscrExc = fpscr;
2213 '''
2214 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2215 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2216
2217 vmulfpCode = '''
2218 FPSCR fpscr = (FPSCR) FpscrExc;
2219 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2220 true, true, VfpRoundNearest);
2221 FpscrExc = fpscr;
2222 '''
2223 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2224 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2225
2226 vmlafpCode = '''
2227 FPSCR fpscr = (FPSCR) FpscrExc;
2228 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2229 true, true, VfpRoundNearest);
2230 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2231 true, true, VfpRoundNearest);
2232 FpscrExc = fpscr;
2233 '''
2234 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2235 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2236
2237 vmlsfpCode = '''
2238 FPSCR fpscr = (FPSCR) FpscrExc;
2239 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2240 true, true, VfpRoundNearest);
2241 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2242 true, true, VfpRoundNearest);
2243 FpscrExc = fpscr;
2244 '''
2245 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2246 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2247
2248 vcgtfpCode = '''
2249 FPSCR fpscr = (FPSCR) FpscrExc;
2250 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2251 true, true, VfpRoundNearest);
2252 destReg = (res == 0) ? -1 : 0;
2253 if (res == 2.0)
2254 fpscr.ioc = 1;
2255 FpscrExc = fpscr;
2256 '''
2257 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2258 2, vcgtfpCode, toInt = True)
2259 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2260 4, vcgtfpCode, toInt = True)
2261
2262 vcgefpCode = '''
2263 FPSCR fpscr = (FPSCR) FpscrExc;
2264 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2265 true, true, VfpRoundNearest);
2266 destReg = (res == 0) ? -1 : 0;
2267 if (res == 2.0)
2268 fpscr.ioc = 1;
2269 FpscrExc = fpscr;
2270 '''
2271 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2272 2, vcgefpCode, toInt = True)
2273 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2274 4, vcgefpCode, toInt = True)
2275
2276 vacgtfpCode = '''
2277 FPSCR fpscr = (FPSCR) FpscrExc;
2278 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2279 true, true, VfpRoundNearest);
2280 destReg = (res == 0) ? -1 : 0;
2281 if (res == 2.0)
2282 fpscr.ioc = 1;
2283 FpscrExc = fpscr;
2284 '''
2285 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2286 2, vacgtfpCode, toInt = True)
2287 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2288 4, vacgtfpCode, toInt = True)
2289
2290 vacgefpCode = '''
2291 FPSCR fpscr = (FPSCR) FpscrExc;
2292 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2293 true, true, VfpRoundNearest);
2294 destReg = (res == 0) ? -1 : 0;
2295 if (res == 2.0)
2296 fpscr.ioc = 1;
2297 FpscrExc = fpscr;
2298 '''
2299 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2300 2, vacgefpCode, toInt = True)
2301 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2302 4, vacgefpCode, toInt = True)
2303
2304 vceqfpCode = '''
2305 FPSCR fpscr = (FPSCR) FpscrExc;
2306 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2307 true, true, VfpRoundNearest);
2308 destReg = (res == 0) ? -1 : 0;
2309 if (res == 2.0)
2310 fpscr.ioc = 1;
2311 FpscrExc = fpscr;
2312 '''
2313 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2314 2, vceqfpCode, toInt = True)
2315 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2316 4, vceqfpCode, toInt = True)
2317
2318 vrecpsCode = '''
2319 FPSCR fpscr = (FPSCR) FpscrExc;
2320 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2321 true, true, VfpRoundNearest);
2322 FpscrExc = fpscr;
2323 '''
2324 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2325 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2326
2327 vrsqrtsCode = '''
2328 FPSCR fpscr = (FPSCR) FpscrExc;
2329 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2330 true, true, VfpRoundNearest);
2331 FpscrExc = fpscr;
2332 '''
2333 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2334 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2335
2336 vabdfpCode = '''
2337 FPSCR fpscr = (FPSCR) FpscrExc;
2338 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2339 true, true, VfpRoundNearest);
2340 destReg = fabs(mid);
2341 FpscrExc = fpscr;
2342 '''
2343 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2344 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2345
2346 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2347 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2348 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2349 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2350 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2351
2352 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2353 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2354 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2355 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2356 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2357
2358 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2359 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2360 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2361 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2362 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2363
2364 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2365 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2366 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2367 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2368 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2369 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2370 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2371 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2372 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2373
2374 vshrCode = '''
2375 if (imm >= sizeof(srcElem1) * 8) {
2376 if (ltz(srcElem1))
2377 destElem = -1;
2378 else
2379 destElem = 0;
2380 } else {
2381 destElem = srcElem1 >> imm;
2382 }
2383 '''
2384 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2385 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2386
2387 vsraCode = '''
2388 Element mid;;
2389 if (imm >= sizeof(srcElem1) * 8) {
2390 mid = ltz(srcElem1) ? -1 : 0;
2391 } else {
2392 mid = srcElem1 >> imm;
2393 if (ltz(srcElem1) && !ltz(mid)) {
2394 mid |= -(mid & ((Element)1 <<
2395 (sizeof(Element) * 8 - 1 - imm)));
2396 }
2397 }
2398 destElem += mid;
2399 '''
2400 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2401 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2402
2403 vrshrCode = '''
2404 if (imm > sizeof(srcElem1) * 8) {
2405 destElem = 0;
2406 } else if (imm) {
2407 Element rBit = bits(srcElem1, imm - 1);
2408 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2409 } else {
2410 destElem = srcElem1;
2411 }
2412 '''
2413 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2414 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2415
2416 vrsraCode = '''
2417 if (imm > sizeof(srcElem1) * 8) {
2418 destElem += 0;
2419 } else if (imm) {
2420 Element rBit = bits(srcElem1, imm - 1);
2421 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2422 } else {
2423 destElem += srcElem1;
2424 }
2425 '''
2426 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2427 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2428
2429 vsriCode = '''
2430 if (imm >= sizeof(Element) * 8)
2431 destElem = destElem;
2432 else
2433 destElem = (srcElem1 >> imm) |
2434 (destElem & ~mask(sizeof(Element) * 8 - imm));
2435 '''
2436 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2437 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2438
2439 vshlCode = '''
2440 if (imm >= sizeof(Element) * 8)
2441 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2442 else
2443 destElem = srcElem1 << imm;
2444 '''
2445 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2446 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2447
2448 vsliCode = '''
2449 if (imm >= sizeof(Element) * 8)
2450 destElem = destElem;
2451 else
2452 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2453 '''
2454 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2455 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2456
2457 vqshlCode = '''
2458 FPSCR fpscr = (FPSCR) FpscrQc;
2459 if (imm >= sizeof(Element) * 8) {
2460 if (srcElem1 != 0) {
2461 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2462 if (srcElem1 > 0)
2463 destElem = ~destElem;
2464 fpscr.qc = 1;
2465 } else {
2466 destElem = 0;
2467 }
2468 } else if (imm) {
2469 destElem = (srcElem1 << imm);
2470 uint64_t topBits = bits((uint64_t)srcElem1,
2471 sizeof(Element) * 8 - 1,
2472 sizeof(Element) * 8 - 1 - imm);
2473 if (topBits != 0 && topBits != mask(imm + 1)) {
2474 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2475 if (srcElem1 > 0)
2476 destElem = ~destElem;
2477 fpscr.qc = 1;
2478 }
2479 } else {
2480 destElem = srcElem1;
2481 }
2482 FpscrQc = fpscr;
2483 '''
2484 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2485 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2486
2487 vqshluCode = '''
2488 FPSCR fpscr = (FPSCR) FpscrQc;
2489 if (imm >= sizeof(Element) * 8) {
2490 if (srcElem1 != 0) {
2491 destElem = mask(sizeof(Element) * 8);
2492 fpscr.qc = 1;
2493 } else {
2494 destElem = 0;
2495 }
2496 } else if (imm) {
2497 destElem = (srcElem1 << imm);
2498 uint64_t topBits = bits((uint64_t)srcElem1,
2499 sizeof(Element) * 8 - 1,
2500 sizeof(Element) * 8 - imm);
2501 if (topBits != 0) {
2502 destElem = mask(sizeof(Element) * 8);
2503 fpscr.qc = 1;
2504 }
2505 } else {
2506 destElem = srcElem1;
2507 }
2508 FpscrQc = fpscr;
2509 '''
2510 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2511 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2512
2513 vqshlusCode = '''
2514 FPSCR fpscr = (FPSCR) FpscrQc;
2515 if (imm >= sizeof(Element) * 8) {
2516 if (srcElem1 < 0) {
2517 destElem = 0;
2518 fpscr.qc = 1;
2519 } else if (srcElem1 > 0) {
2520 destElem = mask(sizeof(Element) * 8);
2521 fpscr.qc = 1;
2522 } else {
2523 destElem = 0;
2524 }
2525 } else if (imm) {
2526 destElem = (srcElem1 << imm);
2527 uint64_t topBits = bits((uint64_t)srcElem1,
2528 sizeof(Element) * 8 - 1,
2529 sizeof(Element) * 8 - imm);
2530 if (srcElem1 < 0) {
2531 destElem = 0;
2532 fpscr.qc = 1;
2533 } else if (topBits != 0) {
2534 destElem = mask(sizeof(Element) * 8);
2535 fpscr.qc = 1;
2536 }
2537 } else {
2538 if (srcElem1 < 0) {
2539 fpscr.qc = 1;
2540 destElem = 0;
2541 } else {
2542 destElem = srcElem1;
2543 }
2544 }
2545 FpscrQc = fpscr;
2546 '''
2547 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2548 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2549
2550 vshrnCode = '''
2551 if (imm >= sizeof(srcElem1) * 8) {
2552 destElem = 0;
2553 } else {
2554 destElem = srcElem1 >> imm;
2555 }
2556 '''
2557 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2558
2559 vrshrnCode = '''
2560 if (imm > sizeof(srcElem1) * 8) {
2561 destElem = 0;
2562 } else if (imm) {
2563 Element rBit = bits(srcElem1, imm - 1);
2564 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2565 } else {
2566 destElem = srcElem1;
2567 }
2568 '''
2569 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2570
2571 vqshrnCode = '''
2572 FPSCR fpscr = (FPSCR) FpscrQc;
2573 if (imm > sizeof(srcElem1) * 8) {
2574 if (srcElem1 != 0 && srcElem1 != -1)
2575 fpscr.qc = 1;
2576 destElem = 0;
2577 } else if (imm) {
2578 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2579 mid |= -(mid & ((BigElement)1 <<
2580 (sizeof(BigElement) * 8 - 1 - imm)));
2581 if (mid != (Element)mid) {
2582 destElem = mask(sizeof(Element) * 8 - 1);
2583 if (srcElem1 < 0)
2584 destElem = ~destElem;
2585 fpscr.qc = 1;
2586 } else {
2587 destElem = mid;
2588 }
2589 } else {
2590 destElem = srcElem1;
2591 }
2592 FpscrQc = fpscr;
2593 '''
2594 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2595
2596 vqshrunCode = '''
2597 FPSCR fpscr = (FPSCR) FpscrQc;
2598 if (imm > sizeof(srcElem1) * 8) {
2599 if (srcElem1 != 0)
2600 fpscr.qc = 1;
2601 destElem = 0;
2602 } else if (imm) {
2603 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2604 if (mid != (Element)mid) {
2605 destElem = mask(sizeof(Element) * 8);
2606 fpscr.qc = 1;
2607 } else {
2608 destElem = mid;
2609 }
2610 } else {
2611 destElem = srcElem1;
2612 }
2613 FpscrQc = fpscr;
2614 '''
2615 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2616 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2617
2618 vqshrunsCode = '''
2619 FPSCR fpscr = (FPSCR) FpscrQc;
2620 if (imm > sizeof(srcElem1) * 8) {
2621 if (srcElem1 != 0)
2622 fpscr.qc = 1;
2623 destElem = 0;
2624 } else if (imm) {
2625 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2626 if (bits(mid, sizeof(BigElement) * 8 - 1,
2627 sizeof(Element) * 8) != 0) {
2628 if (srcElem1 < 0) {
2629 destElem = 0;
2630 } else {
2631 destElem = mask(sizeof(Element) * 8);
2632 }
2633 fpscr.qc = 1;
2634 } else {
2635 destElem = mid;
2636 }
2637 } else {
2638 destElem = srcElem1;
2639 }
2640 FpscrQc = fpscr;
2641 '''
2642 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2643 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2644
2645 vqrshrnCode = '''
2646 FPSCR fpscr = (FPSCR) FpscrQc;
2647 if (imm > sizeof(srcElem1) * 8) {
2648 if (srcElem1 != 0 && srcElem1 != -1)
2649 fpscr.qc = 1;
2650 destElem = 0;
2651 } else if (imm) {
2652 BigElement mid = (srcElem1 >> (imm - 1));
2653 uint64_t rBit = mid & 0x1;
2654 mid >>= 1;
2655 mid |= -(mid & ((BigElement)1 <<
2656 (sizeof(BigElement) * 8 - 1 - imm)));
2657 mid += rBit;
2658 if (mid != (Element)mid) {
2659 destElem = mask(sizeof(Element) * 8 - 1);
2660 if (srcElem1 < 0)
2661 destElem = ~destElem;
2662 fpscr.qc = 1;
2663 } else {
2664 destElem = mid;
2665 }
2666 } else {
2667 if (srcElem1 != (Element)srcElem1) {
2668 destElem = mask(sizeof(Element) * 8 - 1);
2669 if (srcElem1 < 0)
2670 destElem = ~destElem;
2671 fpscr.qc = 1;
2672 } else {
2673 destElem = srcElem1;
2674 }
2675 }
2676 FpscrQc = fpscr;
2677 '''
2678 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2679 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2680
2681 vqrshrunCode = '''
2682 FPSCR fpscr = (FPSCR) FpscrQc;
2683 if (imm > sizeof(srcElem1) * 8) {
2684 if (srcElem1 != 0)
2685 fpscr.qc = 1;
2686 destElem = 0;
2687 } else if (imm) {
2688 BigElement mid = (srcElem1 >> (imm - 1));
2689 uint64_t rBit = mid & 0x1;
2690 mid >>= 1;
2691 mid += rBit;
2692 if (mid != (Element)mid) {
2693 destElem = mask(sizeof(Element) * 8);
2694 fpscr.qc = 1;
2695 } else {
2696 destElem = mid;
2697 }
2698 } else {
2699 if (srcElem1 != (Element)srcElem1) {
2700 destElem = mask(sizeof(Element) * 8 - 1);
2701 fpscr.qc = 1;
2702 } else {
2703 destElem = srcElem1;
2704 }
2705 }
2706 FpscrQc = fpscr;
2707 '''
2708 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2709 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2710
2711 vqrshrunsCode = '''
2712 FPSCR fpscr = (FPSCR) FpscrQc;
2713 if (imm > sizeof(srcElem1) * 8) {
2714 if (srcElem1 != 0)
2715 fpscr.qc = 1;
2716 destElem = 0;
2717 } else if (imm) {
2718 BigElement mid = (srcElem1 >> (imm - 1));
2719 uint64_t rBit = mid & 0x1;
2720 mid >>= 1;
2721 mid |= -(mid & ((BigElement)1 <<
2722 (sizeof(BigElement) * 8 - 1 - imm)));
2723 mid += rBit;
2724 if (bits(mid, sizeof(BigElement) * 8 - 1,
2725 sizeof(Element) * 8) != 0) {
2726 if (srcElem1 < 0) {
2727 destElem = 0;
2728 } else {
2729 destElem = mask(sizeof(Element) * 8);
2730 }
2731 fpscr.qc = 1;
2732 } else {
2733 destElem = mid;
2734 }
2735 } else {
2736 if (srcElem1 < 0) {
2737 fpscr.qc = 1;
2738 destElem = 0;
2739 } else {
2740 destElem = srcElem1;
2741 }
2742 }
2743 FpscrQc = fpscr;
2744 '''
2745 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2746 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2747
2748 vshllCode = '''
2749 if (imm >= sizeof(destElem) * 8) {
2750 destElem = 0;
2751 } else {
2752 destElem = (BigElement)srcElem1 << imm;
2753 }
2754 '''
2755 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2756
2757 vmovlCode = '''
2758 destElem = srcElem1;
2759 '''
2760 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2761
2762 vcvt2ufxCode = '''
2763 FPSCR fpscr = (FPSCR) FpscrExc;
2764 if (flushToZero(srcElem1))
2765 fpscr.idc = 1;
2766 VfpSavedState state = prepFpState(VfpRoundNearest);
2767 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2768 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2769 __asm__ __volatile__("" :: "m" (destReg));
2770 finishVfp(fpscr, state, true);
2771 FpscrExc = fpscr;
2772 '''
2773 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2774 2, vcvt2ufxCode, toInt = True)
2775 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2776 4, vcvt2ufxCode, toInt = True)
2777
2778 vcvt2sfxCode = '''
2779 FPSCR fpscr = (FPSCR) FpscrExc;
2780 if (flushToZero(srcElem1))
2781 fpscr.idc = 1;
2782 VfpSavedState state = prepFpState(VfpRoundNearest);
2783 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2784 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2785 __asm__ __volatile__("" :: "m" (destReg));
2786 finishVfp(fpscr, state, true);
2787 FpscrExc = fpscr;
2788 '''
2789 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2790 2, vcvt2sfxCode, toInt = True)
2791 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2792 4, vcvt2sfxCode, toInt = True)
2793
2794 vcvtu2fpCode = '''
2795 FPSCR fpscr = (FPSCR) FpscrExc;
2796 VfpSavedState state = prepFpState(VfpRoundNearest);
2797 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2799 __asm__ __volatile__("" :: "m" (destElem));
2800 finishVfp(fpscr, state, true);
2801 FpscrExc = fpscr;
2802 '''
2803 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2804 2, vcvtu2fpCode, fromInt = True)
2805 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2806 4, vcvtu2fpCode, fromInt = True)
2807
2808 vcvts2fpCode = '''
2809 FPSCR fpscr = (FPSCR) FpscrExc;
2810 VfpSavedState state = prepFpState(VfpRoundNearest);
2811 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2812 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2813 __asm__ __volatile__("" :: "m" (destElem));
2814 finishVfp(fpscr, state, true);
2815 FpscrExc = fpscr;
2816 '''
2817 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2818 2, vcvts2fpCode, fromInt = True)
2819 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2820 4, vcvts2fpCode, fromInt = True)
2821
2822 vcvts2hCode = '''
2823 FPSCR fpscr = (FPSCR) FpscrExc;
2824 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2825 if (flushToZero(srcFp1))
2826 fpscr.idc = 1;
2827 VfpSavedState state = prepFpState(VfpRoundNearest);
2828 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2829 : "m" (srcFp1), "m" (destElem));
2830 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2831 fpscr.ahp, srcFp1);
2832 __asm__ __volatile__("" :: "m" (destElem));
2833 finishVfp(fpscr, state, true);
2834 FpscrExc = fpscr;
2835 '''
2836 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2837
2838 vcvth2sCode = '''
2839 FPSCR fpscr = (FPSCR) FpscrExc;
2840 VfpSavedState state = prepFpState(VfpRoundNearest);
2841 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2842 : "m" (srcElem1), "m" (destElem));
2843 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2844 __asm__ __volatile__("" :: "m" (destElem));
2845 finishVfp(fpscr, state, true);
2846 FpscrExc = fpscr;
2847 '''
2848 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2849
2850 vrsqrteCode = '''
2851 destElem = unsignedRSqrtEstimate(srcElem1);
2852 '''
2853 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2854 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2855
2856 vrsqrtefpCode = '''
2857 FPSCR fpscr = (FPSCR) FpscrExc;
2858 if (flushToZero(srcReg1))
2859 fpscr.idc = 1;
2860 destReg = fprSqrtEstimate(fpscr, srcReg1);
2861 FpscrExc = fpscr;
2862 '''
2863 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2864 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2865
2866 vrecpeCode = '''
2867 destElem = unsignedRecipEstimate(srcElem1);
2868 '''
2869 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2870 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2871
2872 vrecpefpCode = '''
2873 FPSCR fpscr = (FPSCR) FpscrExc;
2874 if (flushToZero(srcReg1))
2875 fpscr.idc = 1;
2876 destReg = fpRecipEstimate(fpscr, srcReg1);
2877 FpscrExc = fpscr;
2878 '''
2879 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2880 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2881
2882 vrev16Code = '''
2883 destElem = srcElem1;
2884 unsigned groupSize = ((1 << 1) / sizeof(Element));
2885 unsigned reverseMask = (groupSize - 1);
2886 j = i ^ reverseMask;
2887 '''
2888 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2889 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2890 vrev32Code = '''
2891 destElem = srcElem1;
2892 unsigned groupSize = ((1 << 2) / sizeof(Element));
2893 unsigned reverseMask = (groupSize - 1);
2894 j = i ^ reverseMask;
2895 '''
2896 twoRegMiscInst("vrev32", "NVrev32D",
2897 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2898 twoRegMiscInst("vrev32", "NVrev32Q",
2899 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2900 vrev64Code = '''
2901 destElem = srcElem1;
2902 unsigned groupSize = ((1 << 3) / sizeof(Element));
2903 unsigned reverseMask = (groupSize - 1);
2904 j = i ^ reverseMask;
2905 '''
2906 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2907 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2908
2909 vpaddlCode = '''
2910 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2911 '''
2912 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2913 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2914
2915 vpadalCode = '''
2916 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2917 '''
2918 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2919 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2920
2921 vclsCode = '''
2922 unsigned count = 0;
2923 if (srcElem1 < 0) {
2924 srcElem1 <<= 1;
2925 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2926 count++;
2927 srcElem1 <<= 1;
2928 }
2929 } else {
2930 srcElem1 <<= 1;
2931 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2932 count++;
2933 srcElem1 <<= 1;
2934 }
2935 }
2936 destElem = count;
2937 '''
2938 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2939 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2940
2941 vclzCode = '''
2942 unsigned count = 0;
2943 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2944 count++;
2945 srcElem1 <<= 1;
2946 }
2947 destElem = count;
2948 '''
2949 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2950 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2951
2952 vcntCode = '''
2953 unsigned count = 0;
2954 while (srcElem1 && count < sizeof(Element) * 8) {
2955 count += srcElem1 & 0x1;
2956 srcElem1 >>= 1;
2957 }
2958 destElem = count;
2959 '''
2960
2961 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2962 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2963
2964 vmvnCode = '''
2965 destElem = ~srcElem1;
2966 '''
2967 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2968 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2969
2970 vqabsCode = '''
2971 FPSCR fpscr = (FPSCR) FpscrQc;
2972 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2973 fpscr.qc = 1;
2974 destElem = ~srcElem1;
2975 } else if (srcElem1 < 0) {
2976 destElem = -srcElem1;
2977 } else {
2978 destElem = srcElem1;
2979 }
2980 FpscrQc = fpscr;
2981 '''
2982 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2983 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2984
2985 vqnegCode = '''
2986 FPSCR fpscr = (FPSCR) FpscrQc;
2987 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2988 fpscr.qc = 1;
2989 destElem = ~srcElem1;
2990 } else {
2991 destElem = -srcElem1;
2992 }
2993 FpscrQc = fpscr;
2994 '''
2995 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2996 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2997
2998 vabsCode = '''
2999 if (srcElem1 < 0) {
3000 destElem = -srcElem1;
3001 } else {
3002 destElem = srcElem1;
3003 }
3004 '''
3005
3006 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3007 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3008 vabsfpCode = '''
3009 union
3010 {
3011 uint32_t i;
3012 float f;
3013 } cStruct;
3014 cStruct.f = srcReg1;
3015 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3016 destReg = cStruct.f;
3017 '''
3018 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3019 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3020
3021 vnegCode = '''
3022 destElem = -srcElem1;
3023 '''
3024 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3025 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3026 vnegfpCode = '''
3027 destReg = -srcReg1;
3028 '''
3029 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3030 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3031
3032 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3033 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3034 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3035 vcgtfpCode = '''
3036 FPSCR fpscr = (FPSCR) FpscrExc;
3037 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3038 true, true, VfpRoundNearest);
3039 destReg = (res == 0) ? -1 : 0;
3040 if (res == 2.0)
3041 fpscr.ioc = 1;
3042 FpscrExc = fpscr;
3043 '''
3044 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3045 2, vcgtfpCode, toInt = True)
3046 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3047 4, vcgtfpCode, toInt = True)
3048
3049 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3050 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3051 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3052 vcgefpCode = '''
3053 FPSCR fpscr = (FPSCR) FpscrExc;
3054 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3055 true, true, VfpRoundNearest);
3056 destReg = (res == 0) ? -1 : 0;
3057 if (res == 2.0)
3058 fpscr.ioc = 1;
3059 FpscrExc = fpscr;
3060 '''
3061 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3062 2, vcgefpCode, toInt = True)
3063 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3064 4, vcgefpCode, toInt = True)
3065
3066 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3067 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3068 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3069 vceqfpCode = '''
3070 FPSCR fpscr = (FPSCR) FpscrExc;
3071 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3072 true, true, VfpRoundNearest);
3073 destReg = (res == 0) ? -1 : 0;
3074 if (res == 2.0)
3075 fpscr.ioc = 1;
3076 FpscrExc = fpscr;
3077 '''
3078 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3079 2, vceqfpCode, toInt = True)
3080 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3081 4, vceqfpCode, toInt = True)
3082
3083 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3084 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3085 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3086 vclefpCode = '''
3087 FPSCR fpscr = (FPSCR) FpscrExc;
3088 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3089 true, true, VfpRoundNearest);
3090 destReg = (res == 0) ? -1 : 0;
3091 if (res == 2.0)
3092 fpscr.ioc = 1;
3093 FpscrExc = fpscr;
3094 '''
3095 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3096 2, vclefpCode, toInt = True)
3097 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3098 4, vclefpCode, toInt = True)
3099
3100 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3101 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3102 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3103 vcltfpCode = '''
3104 FPSCR fpscr = (FPSCR) FpscrExc;
3105 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3106 true, true, VfpRoundNearest);
3107 destReg = (res == 0) ? -1 : 0;
3108 if (res == 2.0)
3109 fpscr.ioc = 1;
3110 FpscrExc = fpscr;
3111 '''
3112 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3113 2, vcltfpCode, toInt = True)
3114 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3115 4, vcltfpCode, toInt = True)
3116
3117 vswpCode = '''
3118 FloatRegBits mid;
3119 for (unsigned r = 0; r < rCount; r++) {
3120 mid = srcReg1.regs[r];
3121 srcReg1.regs[r] = destReg.regs[r];
3122 destReg.regs[r] = mid;
3123 }
3124 '''
3125 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3126 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3127
3128 vtrnCode = '''
3129 Element mid;
3130 for (unsigned i = 0; i < eCount; i += 2) {
3131 mid = srcReg1.elements[i];
3132 srcReg1.elements[i] = destReg.elements[i + 1];
3133 destReg.elements[i + 1] = mid;
3134 }
3135 '''
3140 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3141 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3136 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3137 smallUnsignedTypes, 2, vtrnCode)
3138 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3139 smallUnsignedTypes, 4, vtrnCode)
3142
3143 vuzpCode = '''
3144 Element mid[eCount];
3145 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3146 for (unsigned i = 0; i < eCount / 2; i++) {
3147 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3148 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3149 destReg.elements[i] = destReg.elements[2 * i];
3150 }
3151 for (unsigned i = 0; i < eCount / 2; i++) {
3152 destReg.elements[eCount / 2 + i] = mid[2 * i];
3153 }
3154 '''
3155 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3156 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3157
3158 vzipCode = '''
3159 Element mid[eCount];
3160 memcpy(&mid, &destReg, sizeof(destReg));
3161 for (unsigned i = 0; i < eCount / 2; i++) {
3162 destReg.elements[2 * i] = mid[i];
3163 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3164 }
3165 for (int i = 0; i < eCount / 2; i++) {
3166 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3167 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3168 }
3169 '''
3170 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3171 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3172
3173 vmovnCode = 'destElem = srcElem1;'
3174 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3175
3176 vdupCode = 'destElem = srcElem1;'
3177 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3178 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3179
3180 def vdupGprInst(name, Name, opClass, types, rCount):
3181 global header_output, exec_output
3182 eWalkCode = '''
3183 RegVect destReg;
3184 for (unsigned i = 0; i < eCount; i++) {
3185 destReg.elements[i] = htog((Element)Op1);
3186 }
3187 '''
3188 for reg in range(rCount):
3189 eWalkCode += '''
3190 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3191 ''' % { "reg" : reg }
3192 iop = InstObjParams(name, Name,
3193 "RegRegOp",
3194 { "code": eWalkCode,
3195 "r_count": rCount,
3196 "predicate_test": predicateTest,
3197 "op_class": opClass }, [])
3198 header_output += NeonRegRegOpDeclare.subst(iop)
3199 exec_output += NeonEqualRegExecute.subst(iop)
3200 for type in types:
3201 substDict = { "targs" : type,
3202 "class_name" : Name }
3203 exec_output += NeonExecDeclare.subst(substDict)
3204 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3205 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3206
3207 vmovCode = 'destElem = imm;'
3208 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3209 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3210
3211 vorrCode = 'destElem |= imm;'
3212 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3213 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3214
3215 vmvnCode = 'destElem = ~imm;'
3216 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3217 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3218
3219 vbicCode = 'destElem &= ~imm;'
3220 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3221 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3222
3223 vqmovnCode = '''
3224 FPSCR fpscr = (FPSCR) FpscrQc;
3225 destElem = srcElem1;
3226 if ((BigElement)destElem != srcElem1) {
3227 fpscr.qc = 1;
3228 destElem = mask(sizeof(Element) * 8 - 1);
3229 if (srcElem1 < 0)
3230 destElem = ~destElem;
3231 }
3232 FpscrQc = fpscr;
3233 '''
3234 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3235
3236 vqmovunCode = '''
3237 FPSCR fpscr = (FPSCR) FpscrQc;
3238 destElem = srcElem1;
3239 if ((BigElement)destElem != srcElem1) {
3240 fpscr.qc = 1;
3241 destElem = mask(sizeof(Element) * 8);
3242 }
3243 FpscrQc = fpscr;
3244 '''
3245 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3246 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3247
3248 vqmovunsCode = '''
3249 FPSCR fpscr = (FPSCR) FpscrQc;
3250 destElem = srcElem1;
3251 if (srcElem1 < 0 ||
3252 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3253 fpscr.qc = 1;
3254 destElem = mask(sizeof(Element) * 8);
3255 if (srcElem1 < 0)
3256 destElem = ~destElem;
3257 }
3258 FpscrQc = fpscr;
3259 '''
3260 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3261 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3262
3263 def buildVext(name, Name, opClass, types, rCount, op):
3264 global header_output, exec_output
3265 eWalkCode = '''
3266 RegVect srcReg1, srcReg2, destReg;
3267 '''
3268 for reg in range(rCount):
3269 eWalkCode += simdEnabledCheckCode + '''
3270 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3271 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3272 ''' % { "reg" : reg }
3273 eWalkCode += op
3274 for reg in range(rCount):
3275 eWalkCode += '''
3276 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3277 ''' % { "reg" : reg }
3278 iop = InstObjParams(name, Name,
3279 "RegRegRegImmOp",
3280 { "code": eWalkCode,
3281 "r_count": rCount,
3282 "predicate_test": predicateTest,
3283 "op_class": opClass }, [])
3284 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3285 exec_output += NeonEqualRegExecute.subst(iop)
3286 for type in types:
3287 substDict = { "targs" : type,
3288 "class_name" : Name }
3289 exec_output += NeonExecDeclare.subst(substDict)
3290
3291 vextCode = '''
3292 for (unsigned i = 0; i < eCount; i++) {
3293 unsigned index = i + imm;
3294 if (index < eCount) {
3295 destReg.elements[i] = srcReg1.elements[index];
3296 } else {
3297 index -= eCount;
3298 if (index >= eCount) {
3299 if (FullSystem)
3300 fault = new UndefinedInstruction;
3301 else
3302 fault = new UndefinedInstruction(false, mnemonic);
3303 } else {
3304 destReg.elements[i] = srcReg2.elements[index];
3305 }
3306 }
3307 }
3308 '''
3309 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3310 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3311
3312 def buildVtbxl(name, Name, opClass, length, isVtbl):
3313 global header_output, decoder_output, exec_output
3314 code = '''
3315 union
3316 {
3317 uint8_t bytes[32];
3318 FloatRegBits regs[8];
3319 } table;
3320
3321 union
3322 {
3323 uint8_t bytes[8];
3324 FloatRegBits regs[2];
3325 } destReg, srcReg2;
3326
3327 const unsigned length = %(length)d;
3328 const bool isVtbl = %(isVtbl)s;
3329
3330 srcReg2.regs[0] = htog(FpOp2P0_uw);
3331 srcReg2.regs[1] = htog(FpOp2P1_uw);
3332
3333 destReg.regs[0] = htog(FpDestP0_uw);
3334 destReg.regs[1] = htog(FpDestP1_uw);
3335 ''' % { "length" : length, "isVtbl" : isVtbl }
3336 for reg in range(8):
3337 if reg < length * 2:
3338 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3339 { "reg" : reg }
3340 else:
3341 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3342 code += '''
3343 for (unsigned i = 0; i < sizeof(destReg); i++) {
3344 uint8_t index = srcReg2.bytes[i];
3345 if (index < 8 * length) {
3346 destReg.bytes[i] = table.bytes[index];
3347 } else {
3348 if (isVtbl)
3349 destReg.bytes[i] = 0;
3350 // else destReg.bytes[i] unchanged
3351 }
3352 }
3353
3354 FpDestP0_uw = gtoh(destReg.regs[0]);
3355 FpDestP1_uw = gtoh(destReg.regs[1]);
3356 '''
3357 iop = InstObjParams(name, Name,
3358 "RegRegRegOp",
3359 { "code": code,
3360 "predicate_test": predicateTest,
3361 "op_class": opClass }, [])
3362 header_output += RegRegRegOpDeclare.subst(iop)
3363 decoder_output += RegRegRegOpConstructor.subst(iop)
3364 exec_output += PredOpExecute.subst(iop)
3365
3366 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3367 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3368 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3369 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3370
3371 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3372 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3373 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3374 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3375}};
3140
3141 vuzpCode = '''
3142 Element mid[eCount];
3143 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3144 for (unsigned i = 0; i < eCount / 2; i++) {
3145 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3146 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3147 destReg.elements[i] = destReg.elements[2 * i];
3148 }
3149 for (unsigned i = 0; i < eCount / 2; i++) {
3150 destReg.elements[eCount / 2 + i] = mid[2 * i];
3151 }
3152 '''
3153 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3154 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3155
3156 vzipCode = '''
3157 Element mid[eCount];
3158 memcpy(&mid, &destReg, sizeof(destReg));
3159 for (unsigned i = 0; i < eCount / 2; i++) {
3160 destReg.elements[2 * i] = mid[i];
3161 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3162 }
3163 for (int i = 0; i < eCount / 2; i++) {
3164 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3165 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3166 }
3167 '''
3168 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3169 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3170
3171 vmovnCode = 'destElem = srcElem1;'
3172 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3173
3174 vdupCode = 'destElem = srcElem1;'
3175 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3176 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3177
3178 def vdupGprInst(name, Name, opClass, types, rCount):
3179 global header_output, exec_output
3180 eWalkCode = '''
3181 RegVect destReg;
3182 for (unsigned i = 0; i < eCount; i++) {
3183 destReg.elements[i] = htog((Element)Op1);
3184 }
3185 '''
3186 for reg in range(rCount):
3187 eWalkCode += '''
3188 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3189 ''' % { "reg" : reg }
3190 iop = InstObjParams(name, Name,
3191 "RegRegOp",
3192 { "code": eWalkCode,
3193 "r_count": rCount,
3194 "predicate_test": predicateTest,
3195 "op_class": opClass }, [])
3196 header_output += NeonRegRegOpDeclare.subst(iop)
3197 exec_output += NeonEqualRegExecute.subst(iop)
3198 for type in types:
3199 substDict = { "targs" : type,
3200 "class_name" : Name }
3201 exec_output += NeonExecDeclare.subst(substDict)
3202 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3203 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3204
3205 vmovCode = 'destElem = imm;'
3206 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3207 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3208
3209 vorrCode = 'destElem |= imm;'
3210 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3211 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3212
3213 vmvnCode = 'destElem = ~imm;'
3214 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3215 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3216
3217 vbicCode = 'destElem &= ~imm;'
3218 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3219 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3220
3221 vqmovnCode = '''
3222 FPSCR fpscr = (FPSCR) FpscrQc;
3223 destElem = srcElem1;
3224 if ((BigElement)destElem != srcElem1) {
3225 fpscr.qc = 1;
3226 destElem = mask(sizeof(Element) * 8 - 1);
3227 if (srcElem1 < 0)
3228 destElem = ~destElem;
3229 }
3230 FpscrQc = fpscr;
3231 '''
3232 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3233
3234 vqmovunCode = '''
3235 FPSCR fpscr = (FPSCR) FpscrQc;
3236 destElem = srcElem1;
3237 if ((BigElement)destElem != srcElem1) {
3238 fpscr.qc = 1;
3239 destElem = mask(sizeof(Element) * 8);
3240 }
3241 FpscrQc = fpscr;
3242 '''
3243 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3244 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3245
3246 vqmovunsCode = '''
3247 FPSCR fpscr = (FPSCR) FpscrQc;
3248 destElem = srcElem1;
3249 if (srcElem1 < 0 ||
3250 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3251 fpscr.qc = 1;
3252 destElem = mask(sizeof(Element) * 8);
3253 if (srcElem1 < 0)
3254 destElem = ~destElem;
3255 }
3256 FpscrQc = fpscr;
3257 '''
3258 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3259 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3260
3261 def buildVext(name, Name, opClass, types, rCount, op):
3262 global header_output, exec_output
3263 eWalkCode = '''
3264 RegVect srcReg1, srcReg2, destReg;
3265 '''
3266 for reg in range(rCount):
3267 eWalkCode += simdEnabledCheckCode + '''
3268 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3269 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3270 ''' % { "reg" : reg }
3271 eWalkCode += op
3272 for reg in range(rCount):
3273 eWalkCode += '''
3274 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3275 ''' % { "reg" : reg }
3276 iop = InstObjParams(name, Name,
3277 "RegRegRegImmOp",
3278 { "code": eWalkCode,
3279 "r_count": rCount,
3280 "predicate_test": predicateTest,
3281 "op_class": opClass }, [])
3282 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3283 exec_output += NeonEqualRegExecute.subst(iop)
3284 for type in types:
3285 substDict = { "targs" : type,
3286 "class_name" : Name }
3287 exec_output += NeonExecDeclare.subst(substDict)
3288
3289 vextCode = '''
3290 for (unsigned i = 0; i < eCount; i++) {
3291 unsigned index = i + imm;
3292 if (index < eCount) {
3293 destReg.elements[i] = srcReg1.elements[index];
3294 } else {
3295 index -= eCount;
3296 if (index >= eCount) {
3297 if (FullSystem)
3298 fault = new UndefinedInstruction;
3299 else
3300 fault = new UndefinedInstruction(false, mnemonic);
3301 } else {
3302 destReg.elements[i] = srcReg2.elements[index];
3303 }
3304 }
3305 }
3306 '''
3307 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3308 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3309
3310 def buildVtbxl(name, Name, opClass, length, isVtbl):
3311 global header_output, decoder_output, exec_output
3312 code = '''
3313 union
3314 {
3315 uint8_t bytes[32];
3316 FloatRegBits regs[8];
3317 } table;
3318
3319 union
3320 {
3321 uint8_t bytes[8];
3322 FloatRegBits regs[2];
3323 } destReg, srcReg2;
3324
3325 const unsigned length = %(length)d;
3326 const bool isVtbl = %(isVtbl)s;
3327
3328 srcReg2.regs[0] = htog(FpOp2P0_uw);
3329 srcReg2.regs[1] = htog(FpOp2P1_uw);
3330
3331 destReg.regs[0] = htog(FpDestP0_uw);
3332 destReg.regs[1] = htog(FpDestP1_uw);
3333 ''' % { "length" : length, "isVtbl" : isVtbl }
3334 for reg in range(8):
3335 if reg < length * 2:
3336 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3337 { "reg" : reg }
3338 else:
3339 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3340 code += '''
3341 for (unsigned i = 0; i < sizeof(destReg); i++) {
3342 uint8_t index = srcReg2.bytes[i];
3343 if (index < 8 * length) {
3344 destReg.bytes[i] = table.bytes[index];
3345 } else {
3346 if (isVtbl)
3347 destReg.bytes[i] = 0;
3348 // else destReg.bytes[i] unchanged
3349 }
3350 }
3351
3352 FpDestP0_uw = gtoh(destReg.regs[0]);
3353 FpDestP1_uw = gtoh(destReg.regs[1]);
3354 '''
3355 iop = InstObjParams(name, Name,
3356 "RegRegRegOp",
3357 { "code": code,
3358 "predicate_test": predicateTest,
3359 "op_class": opClass }, [])
3360 header_output += RegRegRegOpDeclare.subst(iop)
3361 decoder_output += RegRegRegOpConstructor.subst(iop)
3362 exec_output += PredOpExecute.subst(iop)
3363
3364 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3365 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3366 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3367 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3368
3369 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3370 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3371 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3372 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3373}};