1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (isSnan(op1) || isSnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (isSnan(op1) || isSnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (isSnan(op1) || isSnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (isSnan(op1) || isSnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (isSnan(op1) || isSnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (isSnan(op1) || isSnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623
624 header_output = ""
625 exec_output = ""
626
627 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630 signedTypes = smallSignedTypes + ("int64_t",)
631 smallTypes = smallUnsignedTypes + smallSignedTypes
632 allTypes = unsignedTypes + signedTypes
633
634 def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635 readDest=False, pairwise=False):
636 global header_output, exec_output
637 eWalkCode = simdEnabledCheckCode + '''
638 RegVect srcReg1, srcReg2, destReg;
639 '''
640 for reg in range(rCount):
641 eWalkCode += '''
642 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644 ''' % { "reg" : reg }
645 if readDest:
646 eWalkCode += '''
647 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648 ''' % { "reg" : reg }
649 readDestCode = ''
650 if readDest:
651 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652 if pairwise:
653 eWalkCode += '''
654 for (unsigned i = 0; i < eCount; i++) {
655 Element srcElem1 = gtoh(2 * i < eCount ?
656 srcReg1.elements[2 * i] :
657 srcReg2.elements[2 * i - eCount]);
658 Element srcElem2 = gtoh(2 * i < eCount ?
659 srcReg1.elements[2 * i + 1] :
660 srcReg2.elements[2 * i + 1 - eCount]);
661 Element destElem;
662 %(readDest)s
663 %(op)s
664 destReg.elements[i] = htog(destElem);
665 }
666 ''' % { "op" : op, "readDest" : readDestCode }
667 else:
668 eWalkCode += '''
669 for (unsigned i = 0; i < eCount; i++) {
670 Element srcElem1 = gtoh(srcReg1.elements[i]);
671 Element srcElem2 = gtoh(srcReg2.elements[i]);
672 Element destElem;
673 %(readDest)s
674 %(op)s
675 destReg.elements[i] = htog(destElem);
676 }
677 ''' % { "op" : op, "readDest" : readDestCode }
678 for reg in range(rCount):
679 eWalkCode += '''
680 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681 ''' % { "reg" : reg }
682 iop = InstObjParams(name, Name,
683 "RegRegRegOp",
684 { "code": eWalkCode,
685 "r_count": rCount,
686 "predicate_test": predicateTest,
687 "op_class": opClass }, [])
688 header_output += NeonRegRegRegOpDeclare.subst(iop)
689 exec_output += NeonEqualRegExecute.subst(iop)
690 for type in types:
691 substDict = { "targs" : type,
692 "class_name" : Name }
693 exec_output += NeonExecDeclare.subst(substDict)
694
695 def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696 readDest=False, pairwise=False, toInt=False):
697 global header_output, exec_output
698 eWalkCode = simdEnabledCheckCode + '''
699 typedef FloatReg FloatVect[rCount];
700 FloatVect srcRegs1, srcRegs2;
701 '''
702 if toInt:
703 eWalkCode += 'RegVect destRegs;\n'
704 else:
705 eWalkCode += 'FloatVect destRegs;\n'
706 for reg in range(rCount):
707 eWalkCode += '''
708 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710 ''' % { "reg" : reg }
711 if readDest:
712 if toInt:
713 eWalkCode += '''
714 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715 ''' % { "reg" : reg }
716 else:
717 eWalkCode += '''
718 destRegs[%(reg)d] = FpDestP%(reg)d;
719 ''' % { "reg" : reg }
720 readDestCode = ''
721 if readDest:
722 readDestCode = 'destReg = destRegs[r];'
723 destType = 'FloatReg'
724 writeDest = 'destRegs[r] = destReg;'
725 if toInt:
726 destType = 'FloatRegBits'
727 writeDest = 'destRegs.regs[r] = destReg;'
728 if pairwise:
729 eWalkCode += '''
730 for (unsigned r = 0; r < rCount; r++) {
731 FloatReg srcReg1 = (2 * r < rCount) ?
732 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733 FloatReg srcReg2 = (2 * r < rCount) ?
734 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735 %(destType)s destReg;
736 %(readDest)s
737 %(op)s
738 %(writeDest)s
739 }
740 ''' % { "op" : op,
741 "readDest" : readDestCode,
742 "destType" : destType,
743 "writeDest" : writeDest }
744 else:
745 eWalkCode += '''
746 for (unsigned r = 0; r < rCount; r++) {
747 FloatReg srcReg1 = srcRegs1[r];
748 FloatReg srcReg2 = srcRegs2[r];
749 %(destType)s destReg;
750 %(readDest)s
751 %(op)s
752 %(writeDest)s
753 }
754 ''' % { "op" : op,
755 "readDest" : readDestCode,
756 "destType" : destType,
757 "writeDest" : writeDest }
758 for reg in range(rCount):
759 if toInt:
760 eWalkCode += '''
761 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762 ''' % { "reg" : reg }
763 else:
764 eWalkCode += '''
765 FpDestP%(reg)d = destRegs[%(reg)d];
766 ''' % { "reg" : reg }
767 iop = InstObjParams(name, Name,
768 "FpRegRegRegOp",
769 { "code": eWalkCode,
770 "r_count": rCount,
771 "predicate_test": predicateTest,
772 "op_class": opClass }, [])
773 header_output += NeonRegRegRegOpDeclare.subst(iop)
774 exec_output += NeonEqualRegExecute.subst(iop)
775 for type in types:
776 substDict = { "targs" : type,
777 "class_name" : Name }
778 exec_output += NeonExecDeclare.subst(substDict)
779
780 def threeUnequalRegInst(name, Name, opClass, types, op,
781 bigSrc1, bigSrc2, bigDest, readDest):
782 global header_output, exec_output
783 src1Cnt = src2Cnt = destCnt = 2
784 src1Prefix = src2Prefix = destPrefix = ''
785 if bigSrc1:
786 src1Cnt = 4
787 src1Prefix = 'Big'
788 if bigSrc2:
789 src2Cnt = 4
790 src2Prefix = 'Big'
791 if bigDest:
792 destCnt = 4
793 destPrefix = 'Big'
794 eWalkCode = simdEnabledCheckCode + '''
795 %sRegVect srcReg1;
796 %sRegVect srcReg2;
797 %sRegVect destReg;
798 ''' % (src1Prefix, src2Prefix, destPrefix)
799 for reg in range(src1Cnt):
800 eWalkCode += '''
801 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802 ''' % { "reg" : reg }
803 for reg in range(src2Cnt):
804 eWalkCode += '''
805 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806 ''' % { "reg" : reg }
807 if readDest:
808 for reg in range(destCnt):
809 eWalkCode += '''
810 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811 ''' % { "reg" : reg }
812 readDestCode = ''
813 if readDest:
814 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815 eWalkCode += '''
816 for (unsigned i = 0; i < eCount; i++) {
817 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819 %(destPrefix)sElement destElem;
820 %(readDest)s
821 %(op)s
822 destReg.elements[i] = htog(destElem);
823 }
824 ''' % { "op" : op, "readDest" : readDestCode,
825 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826 "destPrefix" : destPrefix }
827 for reg in range(destCnt):
828 eWalkCode += '''
829 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830 ''' % { "reg" : reg }
831 iop = InstObjParams(name, Name,
832 "RegRegRegOp",
833 { "code": eWalkCode,
834 "r_count": 2,
835 "predicate_test": predicateTest,
836 "op_class": opClass }, [])
837 header_output += NeonRegRegRegOpDeclare.subst(iop)
838 exec_output += NeonUnequalRegExecute.subst(iop)
839 for type in types:
840 substDict = { "targs" : type,
841 "class_name" : Name }
842 exec_output += NeonExecDeclare.subst(substDict)
843
844 def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845 threeUnequalRegInst(name, Name, opClass, types, op,
846 True, True, False, readDest)
847
848 def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, opClass, types, op,
850 False, False, True, readDest)
851
852 def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, opClass, types, op,
854 True, False, True, readDest)
855
856 def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857 global header_output, exec_output
858 eWalkCode = simdEnabledCheckCode + '''
859 RegVect srcReg1, srcReg2, destReg;
860 '''
861 for reg in range(rCount):
862 eWalkCode += '''
863 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865 ''' % { "reg" : reg }
866 if readDest:
867 eWalkCode += '''
868 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869 ''' % { "reg" : reg }
870 readDestCode = ''
871 if readDest:
872 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873 eWalkCode += '''
874 if (imm < 0 && imm >= eCount) {
875#if FULL_SYSTEM
876 fault = new UndefinedInstruction;
877#else
878 fault = new UndefinedInstruction(false, mnemonic);
879#endif
880 } else {
881 for (unsigned i = 0; i < eCount; i++) {
882 Element srcElem1 = gtoh(srcReg1.elements[i]);
883 Element srcElem2 = gtoh(srcReg2.elements[imm]);
884 Element destElem;
885 %(readDest)s
886 %(op)s
887 destReg.elements[i] = htog(destElem);
888 }
889 }
890 ''' % { "op" : op, "readDest" : readDestCode }
891 for reg in range(rCount):
892 eWalkCode += '''
893 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
894 ''' % { "reg" : reg }
895 iop = InstObjParams(name, Name,
896 "RegRegRegImmOp",
897 { "code": eWalkCode,
898 "r_count": rCount,
899 "predicate_test": predicateTest,
900 "op_class": opClass }, [])
901 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
902 exec_output += NeonEqualRegExecute.subst(iop)
903 for type in types:
904 substDict = { "targs" : type,
905 "class_name" : Name }
906 exec_output += NeonExecDeclare.subst(substDict)
907
908 def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
909 global header_output, exec_output
910 rCount = 2
911 eWalkCode = simdEnabledCheckCode + '''
912 RegVect srcReg1, srcReg2;
913 BigRegVect destReg;
914 '''
915 for reg in range(rCount):
916 eWalkCode += '''
917 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
918 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
919 ''' % { "reg" : reg }
920 if readDest:
921 for reg in range(2 * rCount):
922 eWalkCode += '''
923 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
924 ''' % { "reg" : reg }
925 readDestCode = ''
926 if readDest:
927 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
928 eWalkCode += '''
929 if (imm < 0 && imm >= eCount) {
930#if FULL_SYSTEM
931 fault = new UndefinedInstruction;
932#else
933 fault = new UndefinedInstruction(false, mnemonic);
934#endif
935 } else {
936 for (unsigned i = 0; i < eCount; i++) {
937 Element srcElem1 = gtoh(srcReg1.elements[i]);
938 Element srcElem2 = gtoh(srcReg2.elements[imm]);
939 BigElement destElem;
940 %(readDest)s
941 %(op)s
942 destReg.elements[i] = htog(destElem);
943 }
944 }
945 ''' % { "op" : op, "readDest" : readDestCode }
946 for reg in range(2 * rCount):
947 eWalkCode += '''
948 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
949 ''' % { "reg" : reg }
950 iop = InstObjParams(name, Name,
951 "RegRegRegImmOp",
952 { "code": eWalkCode,
953 "r_count": rCount,
954 "predicate_test": predicateTest,
955 "op_class": opClass }, [])
956 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
957 exec_output += NeonUnequalRegExecute.subst(iop)
958 for type in types:
959 substDict = { "targs" : type,
960 "class_name" : Name }
961 exec_output += NeonExecDeclare.subst(substDict)
962
963 def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
964 global header_output, exec_output
965 eWalkCode = simdEnabledCheckCode + '''
966 typedef FloatReg FloatVect[rCount];
967 FloatVect srcRegs1, srcRegs2, destRegs;
968 '''
969 for reg in range(rCount):
970 eWalkCode += '''
971 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
972 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
973 ''' % { "reg" : reg }
974 if readDest:
975 eWalkCode += '''
976 destRegs[%(reg)d] = FpDestP%(reg)d;
977 ''' % { "reg" : reg }
978 readDestCode = ''
979 if readDest:
980 readDestCode = 'destReg = destRegs[i];'
981 eWalkCode += '''
982 if (imm < 0 && imm >= eCount) {
983#if FULL_SYSTEM
984 fault = new UndefinedInstruction;
985#else
986 fault = new UndefinedInstruction(false, mnemonic);
987#endif
988 } else {
989 for (unsigned i = 0; i < rCount; i++) {
990 FloatReg srcReg1 = srcRegs1[i];
991 FloatReg srcReg2 = srcRegs2[imm];
992 FloatReg destReg;
993 %(readDest)s
994 %(op)s
995 destRegs[i] = destReg;
996 }
997 }
998 ''' % { "op" : op, "readDest" : readDestCode }
999 for reg in range(rCount):
1000 eWalkCode += '''
1001 FpDestP%(reg)d = destRegs[%(reg)d];
1002 ''' % { "reg" : reg }
1003 iop = InstObjParams(name, Name,
1004 "FpRegRegRegImmOp",
1005 { "code": eWalkCode,
1006 "r_count": rCount,
1007 "predicate_test": predicateTest,
1008 "op_class": opClass }, [])
1009 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1010 exec_output += NeonEqualRegExecute.subst(iop)
1011 for type in types:
1012 substDict = { "targs" : type,
1013 "class_name" : Name }
1014 exec_output += NeonExecDeclare.subst(substDict)
1015
1016 def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1017 readDest=False, toInt=False, fromInt=False):
1018 global header_output, exec_output
1019 eWalkCode = simdEnabledCheckCode + '''
1020 RegVect srcRegs1, destRegs;
1021 '''
1022 for reg in range(rCount):
1023 eWalkCode += '''
1024 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1025 ''' % { "reg" : reg }
1026 if readDest:
1027 eWalkCode += '''
1028 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1029 ''' % { "reg" : reg }
1030 readDestCode = ''
1031 if readDest:
1032 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1033 if toInt:
1034 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1035 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1036 if fromInt:
1037 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1038 declDest = 'Element destElem;'
1039 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1040 if toInt:
1041 declDest = 'FloatRegBits destReg;'
1042 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1043 eWalkCode += '''
1044 for (unsigned i = 0; i < eCount; i++) {
1045 %(readOp)s
1046 %(declDest)s
1047 %(readDest)s
1048 %(op)s
1049 %(writeDest)s
1050 }
1051 ''' % { "readOp" : readOpCode,
1052 "declDest" : declDest,
1053 "readDest" : readDestCode,
1054 "op" : op,
1055 "writeDest" : writeDestCode }
1056 for reg in range(rCount):
1057 eWalkCode += '''
1058 FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1059 ''' % { "reg" : reg }
1060 iop = InstObjParams(name, Name,
1061 "RegRegImmOp",
1062 { "code": eWalkCode,
1063 "r_count": rCount,
1064 "predicate_test": predicateTest,
1065 "op_class": opClass }, [])
1066 header_output += NeonRegRegImmOpDeclare.subst(iop)
1067 exec_output += NeonEqualRegExecute.subst(iop)
1068 for type in types:
1069 substDict = { "targs" : type,
1070 "class_name" : Name }
1071 exec_output += NeonExecDeclare.subst(substDict)
1072
1073 def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1074 global header_output, exec_output
1075 eWalkCode = simdEnabledCheckCode + '''
1076 BigRegVect srcReg1;
1077 RegVect destReg;
1078 '''
1079 for reg in range(4):
1080 eWalkCode += '''
1081 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1082 ''' % { "reg" : reg }
1083 if readDest:
1084 for reg in range(2):
1085 eWalkCode += '''
1086 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1087 ''' % { "reg" : reg }
1088 readDestCode = ''
1089 if readDest:
1090 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1091 eWalkCode += '''
1092 for (unsigned i = 0; i < eCount; i++) {
1093 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1094 Element destElem;
1095 %(readDest)s
1096 %(op)s
1097 destReg.elements[i] = htog(destElem);
1098 }
1099 ''' % { "op" : op, "readDest" : readDestCode }
1100 for reg in range(2):
1101 eWalkCode += '''
1102 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1103 ''' % { "reg" : reg }
1104 iop = InstObjParams(name, Name,
1105 "RegRegImmOp",
1106 { "code": eWalkCode,
1107 "r_count": 2,
1108 "predicate_test": predicateTest,
1109 "op_class": opClass }, [])
1110 header_output += NeonRegRegImmOpDeclare.subst(iop)
1111 exec_output += NeonUnequalRegExecute.subst(iop)
1112 for type in types:
1113 substDict = { "targs" : type,
1114 "class_name" : Name }
1115 exec_output += NeonExecDeclare.subst(substDict)
1116
1117 def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1118 global header_output, exec_output
1119 eWalkCode = simdEnabledCheckCode + '''
1120 RegVect srcReg1;
1121 BigRegVect destReg;
1122 '''
1123 for reg in range(2):
1124 eWalkCode += '''
1125 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1126 ''' % { "reg" : reg }
1127 if readDest:
1128 for reg in range(4):
1129 eWalkCode += '''
1130 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1131 ''' % { "reg" : reg }
1132 readDestCode = ''
1133 if readDest:
1134 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1135 eWalkCode += '''
1136 for (unsigned i = 0; i < eCount; i++) {
1137 Element srcElem1 = gtoh(srcReg1.elements[i]);
1138 BigElement destElem;
1139 %(readDest)s
1140 %(op)s
1141 destReg.elements[i] = htog(destElem);
1142 }
1143 ''' % { "op" : op, "readDest" : readDestCode }
1144 for reg in range(4):
1145 eWalkCode += '''
1146 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1147 ''' % { "reg" : reg }
1148 iop = InstObjParams(name, Name,
1149 "RegRegImmOp",
1150 { "code": eWalkCode,
1151 "r_count": 2,
1152 "predicate_test": predicateTest,
1153 "op_class": opClass }, [])
1154 header_output += NeonRegRegImmOpDeclare.subst(iop)
1155 exec_output += NeonUnequalRegExecute.subst(iop)
1156 for type in types:
1157 substDict = { "targs" : type,
1158 "class_name" : Name }
1159 exec_output += NeonExecDeclare.subst(substDict)
1160
1161 def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1162 global header_output, exec_output
1163 eWalkCode = simdEnabledCheckCode + '''
1164 RegVect srcReg1, destReg;
1165 '''
1166 for reg in range(rCount):
1167 eWalkCode += '''
1168 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1169 ''' % { "reg" : reg }
1170 if readDest:
1171 eWalkCode += '''
1172 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1173 ''' % { "reg" : reg }
1174 readDestCode = ''
1175 if readDest:
1176 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1177 eWalkCode += '''
1178 for (unsigned i = 0; i < eCount; i++) {
1179 unsigned j = i;
1180 Element srcElem1 = gtoh(srcReg1.elements[i]);
1181 Element destElem;
1182 %(readDest)s
1183 %(op)s
1184 destReg.elements[j] = htog(destElem);
1185 }
1186 ''' % { "op" : op, "readDest" : readDestCode }
1187 for reg in range(rCount):
1188 eWalkCode += '''
1189 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1190 ''' % { "reg" : reg }
1191 iop = InstObjParams(name, Name,
1192 "RegRegOp",
1193 { "code": eWalkCode,
1194 "r_count": rCount,
1195 "predicate_test": predicateTest,
1196 "op_class": opClass }, [])
1197 header_output += NeonRegRegOpDeclare.subst(iop)
1198 exec_output += NeonEqualRegExecute.subst(iop)
1199 for type in types:
1200 substDict = { "targs" : type,
1201 "class_name" : Name }
1202 exec_output += NeonExecDeclare.subst(substDict)
1203
1204 def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1205 global header_output, exec_output
1206 eWalkCode = simdEnabledCheckCode + '''
1207 RegVect srcReg1, destReg;
1208 '''
1209 for reg in range(rCount):
1210 eWalkCode += '''
1211 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1212 ''' % { "reg" : reg }
1213 if readDest:
1214 eWalkCode += '''
1215 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1216 ''' % { "reg" : reg }
1217 readDestCode = ''
1218 if readDest:
1219 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1220 eWalkCode += '''
1221 for (unsigned i = 0; i < eCount; i++) {
1222 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1223 Element destElem;
1224 %(readDest)s
1225 %(op)s
1226 destReg.elements[i] = htog(destElem);
1227 }
1228 ''' % { "op" : op, "readDest" : readDestCode }
1229 for reg in range(rCount):
1230 eWalkCode += '''
1231 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1232 ''' % { "reg" : reg }
1233 iop = InstObjParams(name, Name,
1234 "RegRegImmOp",
1235 { "code": eWalkCode,
1236 "r_count": rCount,
1237 "predicate_test": predicateTest,
1238 "op_class": opClass }, [])
1239 header_output += NeonRegRegImmOpDeclare.subst(iop)
1240 exec_output += NeonEqualRegExecute.subst(iop)
1241 for type in types:
1242 substDict = { "targs" : type,
1243 "class_name" : Name }
1244 exec_output += NeonExecDeclare.subst(substDict)
1245
1246 def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1247 global header_output, exec_output
1248 eWalkCode = simdEnabledCheckCode + '''
1249 RegVect srcReg1, destReg;
1250 '''
1251 for reg in range(rCount):
1252 eWalkCode += '''
1253 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1254 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1255 ''' % { "reg" : reg }
1256 if readDest:
1257 eWalkCode += '''
1258 ''' % { "reg" : reg }
1259 readDestCode = ''
1260 if readDest:
1261 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1262 eWalkCode += op
1263 for reg in range(rCount):
1264 eWalkCode += '''
1265 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1266 FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1267 ''' % { "reg" : reg }
1268 iop = InstObjParams(name, Name,
1269 "RegRegOp",
1270 { "code": eWalkCode,
1271 "r_count": rCount,
1272 "predicate_test": predicateTest,
1273 "op_class": opClass }, [])
1274 header_output += NeonRegRegOpDeclare.subst(iop)
1275 exec_output += NeonEqualRegExecute.subst(iop)
1276 for type in types:
1277 substDict = { "targs" : type,
1278 "class_name" : Name }
1279 exec_output += NeonExecDeclare.subst(substDict)
1280
1281 def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1282 readDest=False, toInt=False):
1283 global header_output, exec_output
1284 eWalkCode = simdEnabledCheckCode + '''
1285 typedef FloatReg FloatVect[rCount];
1286 FloatVect srcRegs1;
1287 '''
1288 if toInt:
1289 eWalkCode += 'RegVect destRegs;\n'
1290 else:
1291 eWalkCode += 'FloatVect destRegs;\n'
1292 for reg in range(rCount):
1293 eWalkCode += '''
1294 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1295 ''' % { "reg" : reg }
1296 if readDest:
1297 if toInt:
1298 eWalkCode += '''
1299 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1300 ''' % { "reg" : reg }
1301 else:
1302 eWalkCode += '''
1303 destRegs[%(reg)d] = FpDestP%(reg)d;
1304 ''' % { "reg" : reg }
1305 readDestCode = ''
1306 if readDest:
1307 readDestCode = 'destReg = destRegs[i];'
1308 destType = 'FloatReg'
1309 writeDest = 'destRegs[r] = destReg;'
1310 if toInt:
1311 destType = 'FloatRegBits'
1312 writeDest = 'destRegs.regs[r] = destReg;'
1313 eWalkCode += '''
1314 for (unsigned r = 0; r < rCount; r++) {
1315 FloatReg srcReg1 = srcRegs1[r];
1316 %(destType)s destReg;
1317 %(readDest)s
1318 %(op)s
1319 %(writeDest)s
1320 }
1321 ''' % { "op" : op,
1322 "readDest" : readDestCode,
1323 "destType" : destType,
1324 "writeDest" : writeDest }
1325 for reg in range(rCount):
1326 if toInt:
1327 eWalkCode += '''
1328 FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1329 ''' % { "reg" : reg }
1330 else:
1331 eWalkCode += '''
1332 FpDestP%(reg)d = destRegs[%(reg)d];
1333 ''' % { "reg" : reg }
1334 iop = InstObjParams(name, Name,
1335 "FpRegRegOp",
1336 { "code": eWalkCode,
1337 "r_count": rCount,
1338 "predicate_test": predicateTest,
1339 "op_class": opClass }, [])
1340 header_output += NeonRegRegOpDeclare.subst(iop)
1341 exec_output += NeonEqualRegExecute.subst(iop)
1342 for type in types:
1343 substDict = { "targs" : type,
1344 "class_name" : Name }
1345 exec_output += NeonExecDeclare.subst(substDict)
1346
1347 def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1348 global header_output, exec_output
1349 eWalkCode = simdEnabledCheckCode + '''
1350 RegVect srcRegs;
1351 BigRegVect destReg;
1352 '''
1353 for reg in range(rCount):
1354 eWalkCode += '''
1355 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1356 ''' % { "reg" : reg }
1357 if readDest:
1358 eWalkCode += '''
1359 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1360 ''' % { "reg" : reg }
1361 readDestCode = ''
1362 if readDest:
1363 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1364 eWalkCode += '''
1365 for (unsigned i = 0; i < eCount / 2; i++) {
1366 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1367 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1368 BigElement destElem;
1369 %(readDest)s
1370 %(op)s
1371 destReg.elements[i] = htog(destElem);
1372 }
1373 ''' % { "op" : op, "readDest" : readDestCode }
1374 for reg in range(rCount):
1375 eWalkCode += '''
1376 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1377 ''' % { "reg" : reg }
1378 iop = InstObjParams(name, Name,
1379 "RegRegOp",
1380 { "code": eWalkCode,
1381 "r_count": rCount,
1382 "predicate_test": predicateTest,
1383 "op_class": opClass }, [])
1384 header_output += NeonRegRegOpDeclare.subst(iop)
1385 exec_output += NeonUnequalRegExecute.subst(iop)
1386 for type in types:
1387 substDict = { "targs" : type,
1388 "class_name" : Name }
1389 exec_output += NeonExecDeclare.subst(substDict)
1390
1391 def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1392 global header_output, exec_output
1393 eWalkCode = simdEnabledCheckCode + '''
1394 BigRegVect srcReg1;
1395 RegVect destReg;
1396 '''
1397 for reg in range(4):
1398 eWalkCode += '''
1399 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1400 ''' % { "reg" : reg }
1401 if readDest:
1402 for reg in range(2):
1403 eWalkCode += '''
1404 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1405 ''' % { "reg" : reg }
1406 readDestCode = ''
1407 if readDest:
1408 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1409 eWalkCode += '''
1410 for (unsigned i = 0; i < eCount; i++) {
1411 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1412 Element destElem;
1413 %(readDest)s
1414 %(op)s
1415 destReg.elements[i] = htog(destElem);
1416 }
1417 ''' % { "op" : op, "readDest" : readDestCode }
1418 for reg in range(2):
1419 eWalkCode += '''
1420 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1421 ''' % { "reg" : reg }
1422 iop = InstObjParams(name, Name,
1423 "RegRegOp",
1424 { "code": eWalkCode,
1425 "r_count": 2,
1426 "predicate_test": predicateTest,
1427 "op_class": opClass }, [])
1428 header_output += NeonRegRegOpDeclare.subst(iop)
1429 exec_output += NeonUnequalRegExecute.subst(iop)
1430 for type in types:
1431 substDict = { "targs" : type,
1432 "class_name" : Name }
1433 exec_output += NeonExecDeclare.subst(substDict)
1434
1435 def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1436 global header_output, exec_output
1437 eWalkCode = simdEnabledCheckCode + '''
1438 RegVect destReg;
1439 '''
1440 if readDest:
1441 for reg in range(rCount):
1442 eWalkCode += '''
1443 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1444 ''' % { "reg" : reg }
1445 readDestCode = ''
1446 if readDest:
1447 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1448 eWalkCode += '''
1449 for (unsigned i = 0; i < eCount; i++) {
1450 Element destElem;
1451 %(readDest)s
1452 %(op)s
1453 destReg.elements[i] = htog(destElem);
1454 }
1455 ''' % { "op" : op, "readDest" : readDestCode }
1456 for reg in range(rCount):
1457 eWalkCode += '''
1458 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1459 ''' % { "reg" : reg }
1460 iop = InstObjParams(name, Name,
1461 "RegImmOp",
1462 { "code": eWalkCode,
1463 "r_count": rCount,
1464 "predicate_test": predicateTest,
1465 "op_class": opClass }, [])
1466 header_output += NeonRegImmOpDeclare.subst(iop)
1467 exec_output += NeonEqualRegExecute.subst(iop)
1468 for type in types:
1469 substDict = { "targs" : type,
1470 "class_name" : Name }
1471 exec_output += NeonExecDeclare.subst(substDict)
1472
1473 def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1474 global header_output, exec_output
1475 eWalkCode = simdEnabledCheckCode + '''
1476 RegVect srcReg1;
1477 BigRegVect destReg;
1478 '''
1479 for reg in range(2):
1480 eWalkCode += '''
1481 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1482 ''' % { "reg" : reg }
1483 if readDest:
1484 for reg in range(4):
1485 eWalkCode += '''
1486 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1487 ''' % { "reg" : reg }
1488 readDestCode = ''
1489 if readDest:
1490 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1491 eWalkCode += '''
1492 for (unsigned i = 0; i < eCount; i++) {
1493 Element srcElem1 = gtoh(srcReg1.elements[i]);
1494 BigElement destElem;
1495 %(readDest)s
1496 %(op)s
1497 destReg.elements[i] = htog(destElem);
1498 }
1499 ''' % { "op" : op, "readDest" : readDestCode }
1500 for reg in range(4):
1501 eWalkCode += '''
1502 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1503 ''' % { "reg" : reg }
1504 iop = InstObjParams(name, Name,
1505 "RegRegOp",
1506 { "code": eWalkCode,
1507 "r_count": 2,
1508 "predicate_test": predicateTest,
1509 "op_class": opClass }, [])
1510 header_output += NeonRegRegOpDeclare.subst(iop)
1511 exec_output += NeonUnequalRegExecute.subst(iop)
1512 for type in types:
1513 substDict = { "targs" : type,
1514 "class_name" : Name }
1515 exec_output += NeonExecDeclare.subst(substDict)
1516
1517 vhaddCode = '''
1518 Element carryBit =
1519 (((unsigned)srcElem1 & 0x1) +
1520 ((unsigned)srcElem2 & 0x1)) >> 1;
1521 // Use division instead of a shift to ensure the sign extension works
1522 // right. The compiler will figure out if it can be a shift. Mask the
1523 // inputs so they get truncated correctly.
1524 destElem = (((srcElem1 & ~(Element)1) / 2) +
1525 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1526 '''
1527 threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1528 threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1529
1530 vrhaddCode = '''
1531 Element carryBit =
1532 (((unsigned)srcElem1 & 0x1) +
1533 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1534 // Use division instead of a shift to ensure the sign extension works
1535 // right. The compiler will figure out if it can be a shift. Mask the
1536 // inputs so they get truncated correctly.
1537 destElem = (((srcElem1 & ~(Element)1) / 2) +
1538 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1539 '''
1540 threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1541 threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1542
1543 vhsubCode = '''
1544 Element barrowBit =
1545 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1546 // Use division instead of a shift to ensure the sign extension works
1547 // right. The compiler will figure out if it can be a shift. Mask the
1548 // inputs so they get truncated correctly.
1549 destElem = (((srcElem1 & ~(Element)1) / 2) -
1550 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1551 '''
1552 threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1553 threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1554
1555 vandCode = '''
1556 destElem = srcElem1 & srcElem2;
1557 '''
1558 threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1559 threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1560
1561 vbicCode = '''
1562 destElem = srcElem1 & ~srcElem2;
1563 '''
1564 threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1565 threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1566
1567 vorrCode = '''
1568 destElem = srcElem1 | srcElem2;
1569 '''
1570 threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1571 threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1572
1573 threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1574 threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1575
1576 vornCode = '''
1577 destElem = srcElem1 | ~srcElem2;
1578 '''
1579 threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1580 threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1581
1582 veorCode = '''
1583 destElem = srcElem1 ^ srcElem2;
1584 '''
1585 threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1586 threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1587
1588 vbifCode = '''
1589 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1590 '''
1591 threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1592 threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1593 vbitCode = '''
1594 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1595 '''
1596 threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1597 threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1598 vbslCode = '''
1599 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1600 '''
1601 threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1602 threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1603
1604 vmaxCode = '''
1605 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1606 '''
1607 threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1608 threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1609
1610 vminCode = '''
1611 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1612 '''
1613 threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1614 threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1615
1616 vaddCode = '''
1617 destElem = srcElem1 + srcElem2;
1618 '''
1619 threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1620 threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1621
1622 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1622 threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1623 2, vaddCode, pairwise=True)
1624 threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1625 4, vaddCode, pairwise=True)
1624 vaddlwCode = '''
1625 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1626 '''
1627 threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1628 threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1629 vaddhnCode = '''
1630 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1631 (sizeof(Element) * 8);
1632 '''
1633 threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1634 vraddhnCode = '''
1635 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1636 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1637 (sizeof(Element) * 8);
1638 '''
1639 threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1640
1641 vsubCode = '''
1642 destElem = srcElem1 - srcElem2;
1643 '''
1644 threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1645 threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1646 vsublwCode = '''
1647 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1648 '''
1649 threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1650 threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1651
1652 vqaddUCode = '''
1653 destElem = srcElem1 + srcElem2;
1654 FPSCR fpscr = (FPSCR) FpscrQc;
1655 if (destElem < srcElem1 || destElem < srcElem2) {
1656 destElem = (Element)(-1);
1657 fpscr.qc = 1;
1658 }
1659 FpscrQc = fpscr;
1660 '''
1661 threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1662 threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1663 vsubhnCode = '''
1664 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1665 (sizeof(Element) * 8);
1666 '''
1667 threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1668 vrsubhnCode = '''
1669 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1670 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1671 (sizeof(Element) * 8);
1672 '''
1673 threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1674
1675 vqaddSCode = '''
1676 destElem = srcElem1 + srcElem2;
1677 FPSCR fpscr = (FPSCR) FpscrQc;
1678 bool negDest = (destElem < 0);
1679 bool negSrc1 = (srcElem1 < 0);
1680 bool negSrc2 = (srcElem2 < 0);
1681 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1682 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1683 if (negDest)
1684 destElem -= 1;
1685 fpscr.qc = 1;
1686 }
1687 FpscrQc = fpscr;
1688 '''
1689 threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1690 threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1691
1692 vqsubUCode = '''
1693 destElem = srcElem1 - srcElem2;
1694 FPSCR fpscr = (FPSCR) FpscrQc;
1695 if (destElem > srcElem1) {
1696 destElem = 0;
1697 fpscr.qc = 1;
1698 }
1699 FpscrQc = fpscr;
1700 '''
1701 threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1702 threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1703
1704 vqsubSCode = '''
1705 destElem = srcElem1 - srcElem2;
1706 FPSCR fpscr = (FPSCR) FpscrQc;
1707 bool negDest = (destElem < 0);
1708 bool negSrc1 = (srcElem1 < 0);
1709 bool posSrc2 = (srcElem2 >= 0);
1710 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1711 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1712 if (negDest)
1713 destElem -= 1;
1714 fpscr.qc = 1;
1715 }
1716 FpscrQc = fpscr;
1717 '''
1718 threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1719 threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1720
1721 vcgtCode = '''
1722 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1723 '''
1724 threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1725 threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1726
1727 vcgeCode = '''
1728 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1729 '''
1730 threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1731 threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1732
1733 vceqCode = '''
1734 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1735 '''
1736 threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1737 threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1738
1739 vshlCode = '''
1740 int16_t shiftAmt = (int8_t)srcElem2;
1741 if (shiftAmt < 0) {
1742 shiftAmt = -shiftAmt;
1743 if (shiftAmt >= sizeof(Element) * 8) {
1744 shiftAmt = sizeof(Element) * 8 - 1;
1745 destElem = 0;
1746 } else {
1747 destElem = (srcElem1 >> shiftAmt);
1748 }
1749 // Make sure the right shift sign extended when it should.
1750 if (ltz(srcElem1) && !ltz(destElem)) {
1751 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1752 1 - shiftAmt));
1753 }
1754 } else {
1755 if (shiftAmt >= sizeof(Element) * 8) {
1756 destElem = 0;
1757 } else {
1758 destElem = srcElem1 << shiftAmt;
1759 }
1760 }
1761 '''
1762 threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1763 threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1764
1765 vrshlCode = '''
1766 int16_t shiftAmt = (int8_t)srcElem2;
1767 if (shiftAmt < 0) {
1768 shiftAmt = -shiftAmt;
1769 Element rBit = 0;
1770 if (shiftAmt <= sizeof(Element) * 8)
1771 rBit = bits(srcElem1, shiftAmt - 1);
1772 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1773 rBit = 1;
1774 if (shiftAmt >= sizeof(Element) * 8) {
1775 shiftAmt = sizeof(Element) * 8 - 1;
1776 destElem = 0;
1777 } else {
1778 destElem = (srcElem1 >> shiftAmt);
1779 }
1780 // Make sure the right shift sign extended when it should.
1781 if (ltz(srcElem1) && !ltz(destElem)) {
1782 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1783 1 - shiftAmt));
1784 }
1785 destElem += rBit;
1786 } else if (shiftAmt > 0) {
1787 if (shiftAmt >= sizeof(Element) * 8) {
1788 destElem = 0;
1789 } else {
1790 destElem = srcElem1 << shiftAmt;
1791 }
1792 } else {
1793 destElem = srcElem1;
1794 }
1795 '''
1796 threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1797 threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1798
1799 vqshlUCode = '''
1800 int16_t shiftAmt = (int8_t)srcElem2;
1801 FPSCR fpscr = (FPSCR) FpscrQc;
1802 if (shiftAmt < 0) {
1803 shiftAmt = -shiftAmt;
1804 if (shiftAmt >= sizeof(Element) * 8) {
1805 shiftAmt = sizeof(Element) * 8 - 1;
1806 destElem = 0;
1807 } else {
1808 destElem = (srcElem1 >> shiftAmt);
1809 }
1810 } else if (shiftAmt > 0) {
1811 if (shiftAmt >= sizeof(Element) * 8) {
1812 if (srcElem1 != 0) {
1813 destElem = mask(sizeof(Element) * 8);
1814 fpscr.qc = 1;
1815 } else {
1816 destElem = 0;
1817 }
1818 } else {
1819 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1820 sizeof(Element) * 8 - shiftAmt)) {
1821 destElem = mask(sizeof(Element) * 8);
1822 fpscr.qc = 1;
1823 } else {
1824 destElem = srcElem1 << shiftAmt;
1825 }
1826 }
1827 } else {
1828 destElem = srcElem1;
1829 }
1830 FpscrQc = fpscr;
1831 '''
1832 threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1833 threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1834
1835 vqshlSCode = '''
1836 int16_t shiftAmt = (int8_t)srcElem2;
1837 FPSCR fpscr = (FPSCR) FpscrQc;
1838 if (shiftAmt < 0) {
1839 shiftAmt = -shiftAmt;
1840 if (shiftAmt >= sizeof(Element) * 8) {
1841 shiftAmt = sizeof(Element) * 8 - 1;
1842 destElem = 0;
1843 } else {
1844 destElem = (srcElem1 >> shiftAmt);
1845 }
1846 // Make sure the right shift sign extended when it should.
1847 if (srcElem1 < 0 && destElem >= 0) {
1848 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1849 1 - shiftAmt));
1850 }
1851 } else if (shiftAmt > 0) {
1852 bool sat = false;
1853 if (shiftAmt >= sizeof(Element) * 8) {
1854 if (srcElem1 != 0)
1855 sat = true;
1856 else
1857 destElem = 0;
1858 } else {
1859 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1860 sizeof(Element) * 8 - 1 - shiftAmt) !=
1861 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1862 sat = true;
1863 } else {
1864 destElem = srcElem1 << shiftAmt;
1865 }
1866 }
1867 if (sat) {
1868 fpscr.qc = 1;
1869 destElem = mask(sizeof(Element) * 8 - 1);
1870 if (srcElem1 < 0)
1871 destElem = ~destElem;
1872 }
1873 } else {
1874 destElem = srcElem1;
1875 }
1876 FpscrQc = fpscr;
1877 '''
1878 threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1879 threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1880
1881 vqrshlUCode = '''
1882 int16_t shiftAmt = (int8_t)srcElem2;
1883 FPSCR fpscr = (FPSCR) FpscrQc;
1884 if (shiftAmt < 0) {
1885 shiftAmt = -shiftAmt;
1886 Element rBit = 0;
1887 if (shiftAmt <= sizeof(Element) * 8)
1888 rBit = bits(srcElem1, shiftAmt - 1);
1889 if (shiftAmt >= sizeof(Element) * 8) {
1890 shiftAmt = sizeof(Element) * 8 - 1;
1891 destElem = 0;
1892 } else {
1893 destElem = (srcElem1 >> shiftAmt);
1894 }
1895 destElem += rBit;
1896 } else {
1897 if (shiftAmt >= sizeof(Element) * 8) {
1898 if (srcElem1 != 0) {
1899 destElem = mask(sizeof(Element) * 8);
1900 fpscr.qc = 1;
1901 } else {
1902 destElem = 0;
1903 }
1904 } else {
1905 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1906 sizeof(Element) * 8 - shiftAmt)) {
1907 destElem = mask(sizeof(Element) * 8);
1908 fpscr.qc = 1;
1909 } else {
1910 destElem = srcElem1 << shiftAmt;
1911 }
1912 }
1913 }
1914 FpscrQc = fpscr;
1915 '''
1916 threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1917 threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1918
1919 vqrshlSCode = '''
1920 int16_t shiftAmt = (int8_t)srcElem2;
1921 FPSCR fpscr = (FPSCR) FpscrQc;
1922 if (shiftAmt < 0) {
1923 shiftAmt = -shiftAmt;
1924 Element rBit = 0;
1925 if (shiftAmt <= sizeof(Element) * 8)
1926 rBit = bits(srcElem1, shiftAmt - 1);
1927 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1928 rBit = 1;
1929 if (shiftAmt >= sizeof(Element) * 8) {
1930 shiftAmt = sizeof(Element) * 8 - 1;
1931 destElem = 0;
1932 } else {
1933 destElem = (srcElem1 >> shiftAmt);
1934 }
1935 // Make sure the right shift sign extended when it should.
1936 if (srcElem1 < 0 && destElem >= 0) {
1937 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1938 1 - shiftAmt));
1939 }
1940 destElem += rBit;
1941 } else if (shiftAmt > 0) {
1942 bool sat = false;
1943 if (shiftAmt >= sizeof(Element) * 8) {
1944 if (srcElem1 != 0)
1945 sat = true;
1946 else
1947 destElem = 0;
1948 } else {
1949 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1950 sizeof(Element) * 8 - 1 - shiftAmt) !=
1951 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1952 sat = true;
1953 } else {
1954 destElem = srcElem1 << shiftAmt;
1955 }
1956 }
1957 if (sat) {
1958 fpscr.qc = 1;
1959 destElem = mask(sizeof(Element) * 8 - 1);
1960 if (srcElem1 < 0)
1961 destElem = ~destElem;
1962 }
1963 } else {
1964 destElem = srcElem1;
1965 }
1966 FpscrQc = fpscr;
1967 '''
1968 threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1969 threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1970
1971 vabaCode = '''
1972 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1973 (srcElem2 - srcElem1);
1974 '''
1975 threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1976 threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1977 vabalCode = '''
1978 destElem += (srcElem1 > srcElem2) ?
1979 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1980 ((BigElement)srcElem2 - (BigElement)srcElem1);
1981 '''
1982 threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1983
1984 vabdCode = '''
1985 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1986 (srcElem2 - srcElem1);
1987 '''
1988 threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1989 threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1990 vabdlCode = '''
1991 destElem = (srcElem1 > srcElem2) ?
1992 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1993 ((BigElement)srcElem2 - (BigElement)srcElem1);
1994 '''
1995 threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1996
1997 vtstCode = '''
1998 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1999 '''
2000 threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2001 threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2002
2003 vmulCode = '''
2004 destElem = srcElem1 * srcElem2;
2005 '''
2006 threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2007 threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2008 vmullCode = '''
2009 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2010 '''
2011 threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2012
2013 vmlaCode = '''
2014 destElem = destElem + srcElem1 * srcElem2;
2015 '''
2016 threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2017 threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2018 vmlalCode = '''
2019 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2020 '''
2021 threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2022
2023 vqdmlalCode = '''
2024 FPSCR fpscr = (FPSCR) FpscrQc;
2025 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2026 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2027 Element halfNeg = maxNeg / 2;
2028 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2029 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2030 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2031 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2032 fpscr.qc = 1;
2033 }
2034 bool negPreDest = ltz(destElem);
2035 destElem += midElem;
2036 bool negDest = ltz(destElem);
2037 bool negMid = ltz(midElem);
2038 if (negPreDest == negMid && negMid != negDest) {
2039 destElem = mask(sizeof(BigElement) * 8 - 1);
2040 if (negPreDest)
2041 destElem = ~destElem;
2042 fpscr.qc = 1;
2043 }
2044 FpscrQc = fpscr;
2045 '''
2046 threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2047
2048 vqdmlslCode = '''
2049 FPSCR fpscr = (FPSCR) FpscrQc;
2050 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2051 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2052 Element halfNeg = maxNeg / 2;
2053 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2054 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2055 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2056 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2057 fpscr.qc = 1;
2058 }
2059 bool negPreDest = ltz(destElem);
2060 destElem -= midElem;
2061 bool negDest = ltz(destElem);
2062 bool posMid = ltz((BigElement)-midElem);
2063 if (negPreDest == posMid && posMid != negDest) {
2064 destElem = mask(sizeof(BigElement) * 8 - 1);
2065 if (negPreDest)
2066 destElem = ~destElem;
2067 fpscr.qc = 1;
2068 }
2069 FpscrQc = fpscr;
2070 '''
2071 threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2072
2073 vqdmullCode = '''
2074 FPSCR fpscr = (FPSCR) FpscrQc;
2075 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2076 if (srcElem1 == srcElem2 &&
2077 srcElem1 == (Element)((Element)1 <<
2078 (Element)(sizeof(Element) * 8 - 1))) {
2079 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2080 fpscr.qc = 1;
2081 }
2082 FpscrQc = fpscr;
2083 '''
2084 threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2085
2086 vmlsCode = '''
2087 destElem = destElem - srcElem1 * srcElem2;
2088 '''
2089 threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2090 threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2091 vmlslCode = '''
2092 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2093 '''
2094 threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2095
2096 vmulpCode = '''
2097 destElem = 0;
2098 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2099 if (bits(srcElem2, j))
2100 destElem ^= srcElem1 << j;
2101 }
2102 '''
2103 threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2104 threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2105 vmullpCode = '''
2106 destElem = 0;
2107 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2108 if (bits(srcElem2, j))
2109 destElem ^= (BigElement)srcElem1 << j;
2110 }
2111 '''
2112 threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2113
2116 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2117 threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2114 threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2115
2119 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2120 threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2116 threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2117
2118 vqdmulhCode = '''
2119 FPSCR fpscr = (FPSCR) FpscrQc;
2120 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2121 (sizeof(Element) * 8);
2122 if (srcElem1 == srcElem2 &&
2123 srcElem1 == (Element)((Element)1 <<
2124 (sizeof(Element) * 8 - 1))) {
2125 destElem = ~srcElem1;
2126 fpscr.qc = 1;
2127 }
2128 FpscrQc = fpscr;
2129 '''
2130 threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2131 threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2132
2133 vqrdmulhCode = '''
2134 FPSCR fpscr = (FPSCR) FpscrQc;
2135 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2136 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2137 (sizeof(Element) * 8);
2138 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2139 Element halfNeg = maxNeg / 2;
2140 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2141 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2142 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2143 if (destElem < 0) {
2144 destElem = mask(sizeof(Element) * 8 - 1);
2145 } else {
2146 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2147 }
2148 fpscr.qc = 1;
2149 }
2150 FpscrQc = fpscr;
2151 '''
2152 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2153 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2154 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2155 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2156
2157 vmaxfpCode = '''
2158 FPSCR fpscr = (FPSCR) FpscrExc;
2159 bool done;
2160 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2161 if (!done) {
2162 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2163 true, true, VfpRoundNearest);
2164 } else if (flushToZero(srcReg1, srcReg2)) {
2165 fpscr.idc = 1;
2166 }
2167 FpscrExc = fpscr;
2168 '''
2169 threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2170 threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2171
2172 vminfpCode = '''
2173 FPSCR fpscr = (FPSCR) FpscrExc;
2174 bool done;
2175 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2176 if (!done) {
2177 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2178 true, true, VfpRoundNearest);
2179 } else if (flushToZero(srcReg1, srcReg2)) {
2180 fpscr.idc = 1;
2181 }
2182 FpscrExc = fpscr;
2183 '''
2184 threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2185 threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2186
2187 threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2188 2, vmaxfpCode, pairwise=True)
2189 threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2190 4, vmaxfpCode, pairwise=True)
2191
2192 threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2193 2, vminfpCode, pairwise=True)
2194 threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2195 4, vminfpCode, pairwise=True)
2196
2197 vaddfpCode = '''
2198 FPSCR fpscr = (FPSCR) FpscrExc;
2199 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2200 true, true, VfpRoundNearest);
2201 FpscrExc = fpscr;
2202 '''
2203 threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2204 threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2205
2206 threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2207 2, vaddfpCode, pairwise=True)
2208 threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2209 4, vaddfpCode, pairwise=True)
2210
2211 vsubfpCode = '''
2212 FPSCR fpscr = (FPSCR) FpscrExc;
2213 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2214 true, true, VfpRoundNearest);
2215 FpscrExc = fpscr;
2216 '''
2217 threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2218 threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2219
2220 vmulfpCode = '''
2221 FPSCR fpscr = (FPSCR) FpscrExc;
2222 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2223 true, true, VfpRoundNearest);
2224 FpscrExc = fpscr;
2225 '''
2226 threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2227 threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2228
2229 vmlafpCode = '''
2230 FPSCR fpscr = (FPSCR) FpscrExc;
2231 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2232 true, true, VfpRoundNearest);
2233 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2234 true, true, VfpRoundNearest);
2235 FpscrExc = fpscr;
2236 '''
2237 threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2238 threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2239
2240 vmlsfpCode = '''
2241 FPSCR fpscr = (FPSCR) FpscrExc;
2242 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2243 true, true, VfpRoundNearest);
2244 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2245 true, true, VfpRoundNearest);
2246 FpscrExc = fpscr;
2247 '''
2248 threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2249 threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2250
2251 vcgtfpCode = '''
2252 FPSCR fpscr = (FPSCR) FpscrExc;
2253 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2254 true, true, VfpRoundNearest);
2255 destReg = (res == 0) ? -1 : 0;
2256 if (res == 2.0)
2257 fpscr.ioc = 1;
2258 FpscrExc = fpscr;
2259 '''
2260 threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2261 2, vcgtfpCode, toInt = True)
2262 threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2263 4, vcgtfpCode, toInt = True)
2264
2265 vcgefpCode = '''
2266 FPSCR fpscr = (FPSCR) FpscrExc;
2267 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2268 true, true, VfpRoundNearest);
2269 destReg = (res == 0) ? -1 : 0;
2270 if (res == 2.0)
2271 fpscr.ioc = 1;
2272 FpscrExc = fpscr;
2273 '''
2274 threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2275 2, vcgefpCode, toInt = True)
2276 threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2277 4, vcgefpCode, toInt = True)
2278
2279 vacgtfpCode = '''
2280 FPSCR fpscr = (FPSCR) FpscrExc;
2281 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2282 true, true, VfpRoundNearest);
2283 destReg = (res == 0) ? -1 : 0;
2284 if (res == 2.0)
2285 fpscr.ioc = 1;
2286 FpscrExc = fpscr;
2287 '''
2288 threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2289 2, vacgtfpCode, toInt = True)
2290 threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2291 4, vacgtfpCode, toInt = True)
2292
2293 vacgefpCode = '''
2294 FPSCR fpscr = (FPSCR) FpscrExc;
2295 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2296 true, true, VfpRoundNearest);
2297 destReg = (res == 0) ? -1 : 0;
2298 if (res == 2.0)
2299 fpscr.ioc = 1;
2300 FpscrExc = fpscr;
2301 '''
2302 threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2303 2, vacgefpCode, toInt = True)
2304 threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2305 4, vacgefpCode, toInt = True)
2306
2307 vceqfpCode = '''
2308 FPSCR fpscr = (FPSCR) FpscrExc;
2309 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2310 true, true, VfpRoundNearest);
2311 destReg = (res == 0) ? -1 : 0;
2312 if (res == 2.0)
2313 fpscr.ioc = 1;
2314 FpscrExc = fpscr;
2315 '''
2316 threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2317 2, vceqfpCode, toInt = True)
2318 threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2319 4, vceqfpCode, toInt = True)
2320
2321 vrecpsCode = '''
2322 FPSCR fpscr = (FPSCR) FpscrExc;
2323 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2324 true, true, VfpRoundNearest);
2325 FpscrExc = fpscr;
2326 '''
2327 threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2328 threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2329
2330 vrsqrtsCode = '''
2331 FPSCR fpscr = (FPSCR) FpscrExc;
2332 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2333 true, true, VfpRoundNearest);
2334 FpscrExc = fpscr;
2335 '''
2336 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2337 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2338
2339 vabdfpCode = '''
2340 FPSCR fpscr = (FPSCR) FpscrExc;
2341 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2342 true, true, VfpRoundNearest);
2343 destReg = fabs(mid);
2344 FpscrExc = fpscr;
2345 '''
2346 threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2347 threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2348
2349 twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2350 twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2351 twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2352 twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2353 twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2354
2355 twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2356 twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2357 twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2358 twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2359 twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2360
2361 twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2362 twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2363 twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2364 twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2365 twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2366
2367 twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2368 twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2369 twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2370 twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2371 twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2372 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2373 "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2374 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2375 "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2376
2377 vshrCode = '''
2378 if (imm >= sizeof(srcElem1) * 8) {
2379 if (ltz(srcElem1))
2380 destElem = -1;
2381 else
2382 destElem = 0;
2383 } else {
2384 destElem = srcElem1 >> imm;
2385 }
2386 '''
2387 twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2388 twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2389
2390 vsraCode = '''
2391 Element mid;;
2392 if (imm >= sizeof(srcElem1) * 8) {
2393 mid = ltz(srcElem1) ? -1 : 0;
2394 } else {
2395 mid = srcElem1 >> imm;
2396 if (ltz(srcElem1) && !ltz(mid)) {
2397 mid |= -(mid & ((Element)1 <<
2398 (sizeof(Element) * 8 - 1 - imm)));
2399 }
2400 }
2401 destElem += mid;
2402 '''
2403 twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2404 twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2405
2406 vrshrCode = '''
2407 if (imm > sizeof(srcElem1) * 8) {
2408 destElem = 0;
2409 } else if (imm) {
2410 Element rBit = bits(srcElem1, imm - 1);
2411 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2412 } else {
2413 destElem = srcElem1;
2414 }
2415 '''
2416 twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2417 twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2418
2419 vrsraCode = '''
2420 if (imm > sizeof(srcElem1) * 8) {
2421 destElem += 0;
2422 } else if (imm) {
2423 Element rBit = bits(srcElem1, imm - 1);
2424 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2425 } else {
2426 destElem += srcElem1;
2427 }
2428 '''
2429 twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2430 twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2431
2432 vsriCode = '''
2433 if (imm >= sizeof(Element) * 8)
2434 destElem = destElem;
2435 else
2436 destElem = (srcElem1 >> imm) |
2437 (destElem & ~mask(sizeof(Element) * 8 - imm));
2438 '''
2439 twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2440 twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2441
2442 vshlCode = '''
2443 if (imm >= sizeof(Element) * 8)
2444 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2445 else
2446 destElem = srcElem1 << imm;
2447 '''
2448 twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2449 twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2450
2451 vsliCode = '''
2452 if (imm >= sizeof(Element) * 8)
2453 destElem = destElem;
2454 else
2455 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2456 '''
2457 twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2458 twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2459
2460 vqshlCode = '''
2461 FPSCR fpscr = (FPSCR) FpscrQc;
2462 if (imm >= sizeof(Element) * 8) {
2463 if (srcElem1 != 0) {
2464 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2465 if (srcElem1 > 0)
2466 destElem = ~destElem;
2467 fpscr.qc = 1;
2468 } else {
2469 destElem = 0;
2470 }
2471 } else if (imm) {
2472 destElem = (srcElem1 << imm);
2473 uint64_t topBits = bits((uint64_t)srcElem1,
2474 sizeof(Element) * 8 - 1,
2475 sizeof(Element) * 8 - 1 - imm);
2476 if (topBits != 0 && topBits != mask(imm + 1)) {
2477 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2478 if (srcElem1 > 0)
2479 destElem = ~destElem;
2480 fpscr.qc = 1;
2481 }
2482 } else {
2483 destElem = srcElem1;
2484 }
2485 FpscrQc = fpscr;
2486 '''
2487 twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2488 twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2489
2490 vqshluCode = '''
2491 FPSCR fpscr = (FPSCR) FpscrQc;
2492 if (imm >= sizeof(Element) * 8) {
2493 if (srcElem1 != 0) {
2494 destElem = mask(sizeof(Element) * 8);
2495 fpscr.qc = 1;
2496 } else {
2497 destElem = 0;
2498 }
2499 } else if (imm) {
2500 destElem = (srcElem1 << imm);
2501 uint64_t topBits = bits((uint64_t)srcElem1,
2502 sizeof(Element) * 8 - 1,
2503 sizeof(Element) * 8 - imm);
2504 if (topBits != 0) {
2505 destElem = mask(sizeof(Element) * 8);
2506 fpscr.qc = 1;
2507 }
2508 } else {
2509 destElem = srcElem1;
2510 }
2511 FpscrQc = fpscr;
2512 '''
2513 twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2514 twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2515
2516 vqshlusCode = '''
2517 FPSCR fpscr = (FPSCR) FpscrQc;
2518 if (imm >= sizeof(Element) * 8) {
2519 if (srcElem1 < 0) {
2520 destElem = 0;
2521 fpscr.qc = 1;
2522 } else if (srcElem1 > 0) {
2523 destElem = mask(sizeof(Element) * 8);
2524 fpscr.qc = 1;
2525 } else {
2526 destElem = 0;
2527 }
2528 } else if (imm) {
2529 destElem = (srcElem1 << imm);
2530 uint64_t topBits = bits((uint64_t)srcElem1,
2531 sizeof(Element) * 8 - 1,
2532 sizeof(Element) * 8 - imm);
2533 if (srcElem1 < 0) {
2534 destElem = 0;
2535 fpscr.qc = 1;
2536 } else if (topBits != 0) {
2537 destElem = mask(sizeof(Element) * 8);
2538 fpscr.qc = 1;
2539 }
2540 } else {
2541 if (srcElem1 < 0) {
2542 fpscr.qc = 1;
2543 destElem = 0;
2544 } else {
2545 destElem = srcElem1;
2546 }
2547 }
2548 FpscrQc = fpscr;
2549 '''
2550 twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2551 twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2552
2553 vshrnCode = '''
2554 if (imm >= sizeof(srcElem1) * 8) {
2555 destElem = 0;
2556 } else {
2557 destElem = srcElem1 >> imm;
2558 }
2559 '''
2560 twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2561
2562 vrshrnCode = '''
2563 if (imm > sizeof(srcElem1) * 8) {
2564 destElem = 0;
2565 } else if (imm) {
2566 Element rBit = bits(srcElem1, imm - 1);
2567 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2568 } else {
2569 destElem = srcElem1;
2570 }
2571 '''
2572 twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2573
2574 vqshrnCode = '''
2575 FPSCR fpscr = (FPSCR) FpscrQc;
2576 if (imm > sizeof(srcElem1) * 8) {
2577 if (srcElem1 != 0 && srcElem1 != -1)
2578 fpscr.qc = 1;
2579 destElem = 0;
2580 } else if (imm) {
2581 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2582 mid |= -(mid & ((BigElement)1 <<
2583 (sizeof(BigElement) * 8 - 1 - imm)));
2584 if (mid != (Element)mid) {
2585 destElem = mask(sizeof(Element) * 8 - 1);
2586 if (srcElem1 < 0)
2587 destElem = ~destElem;
2588 fpscr.qc = 1;
2589 } else {
2590 destElem = mid;
2591 }
2592 } else {
2593 destElem = srcElem1;
2594 }
2595 FpscrQc = fpscr;
2596 '''
2597 twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2598
2599 vqshrunCode = '''
2600 FPSCR fpscr = (FPSCR) FpscrQc;
2601 if (imm > sizeof(srcElem1) * 8) {
2602 if (srcElem1 != 0)
2603 fpscr.qc = 1;
2604 destElem = 0;
2605 } else if (imm) {
2606 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2607 if (mid != (Element)mid) {
2608 destElem = mask(sizeof(Element) * 8);
2609 fpscr.qc = 1;
2610 } else {
2611 destElem = mid;
2612 }
2613 } else {
2614 destElem = srcElem1;
2615 }
2616 FpscrQc = fpscr;
2617 '''
2618 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2619 "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2620
2621 vqshrunsCode = '''
2622 FPSCR fpscr = (FPSCR) FpscrQc;
2623 if (imm > sizeof(srcElem1) * 8) {
2624 if (srcElem1 != 0)
2625 fpscr.qc = 1;
2626 destElem = 0;
2627 } else if (imm) {
2628 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2629 if (bits(mid, sizeof(BigElement) * 8 - 1,
2630 sizeof(Element) * 8) != 0) {
2631 if (srcElem1 < 0) {
2632 destElem = 0;
2633 } else {
2634 destElem = mask(sizeof(Element) * 8);
2635 }
2636 fpscr.qc = 1;
2637 } else {
2638 destElem = mid;
2639 }
2640 } else {
2641 destElem = srcElem1;
2642 }
2643 FpscrQc = fpscr;
2644 '''
2645 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2646 "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2647
2648 vqrshrnCode = '''
2649 FPSCR fpscr = (FPSCR) FpscrQc;
2650 if (imm > sizeof(srcElem1) * 8) {
2651 if (srcElem1 != 0 && srcElem1 != -1)
2652 fpscr.qc = 1;
2653 destElem = 0;
2654 } else if (imm) {
2655 BigElement mid = (srcElem1 >> (imm - 1));
2656 uint64_t rBit = mid & 0x1;
2657 mid >>= 1;
2658 mid |= -(mid & ((BigElement)1 <<
2659 (sizeof(BigElement) * 8 - 1 - imm)));
2660 mid += rBit;
2661 if (mid != (Element)mid) {
2662 destElem = mask(sizeof(Element) * 8 - 1);
2663 if (srcElem1 < 0)
2664 destElem = ~destElem;
2665 fpscr.qc = 1;
2666 } else {
2667 destElem = mid;
2668 }
2669 } else {
2670 if (srcElem1 != (Element)srcElem1) {
2671 destElem = mask(sizeof(Element) * 8 - 1);
2672 if (srcElem1 < 0)
2673 destElem = ~destElem;
2674 fpscr.qc = 1;
2675 } else {
2676 destElem = srcElem1;
2677 }
2678 }
2679 FpscrQc = fpscr;
2680 '''
2681 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2682 "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2683
2684 vqrshrunCode = '''
2685 FPSCR fpscr = (FPSCR) FpscrQc;
2686 if (imm > sizeof(srcElem1) * 8) {
2687 if (srcElem1 != 0)
2688 fpscr.qc = 1;
2689 destElem = 0;
2690 } else if (imm) {
2691 BigElement mid = (srcElem1 >> (imm - 1));
2692 uint64_t rBit = mid & 0x1;
2693 mid >>= 1;
2694 mid += rBit;
2695 if (mid != (Element)mid) {
2696 destElem = mask(sizeof(Element) * 8);
2697 fpscr.qc = 1;
2698 } else {
2699 destElem = mid;
2700 }
2701 } else {
2702 if (srcElem1 != (Element)srcElem1) {
2703 destElem = mask(sizeof(Element) * 8 - 1);
2704 fpscr.qc = 1;
2705 } else {
2706 destElem = srcElem1;
2707 }
2708 }
2709 FpscrQc = fpscr;
2710 '''
2711 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2712 "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2713
2714 vqrshrunsCode = '''
2715 FPSCR fpscr = (FPSCR) FpscrQc;
2716 if (imm > sizeof(srcElem1) * 8) {
2717 if (srcElem1 != 0)
2718 fpscr.qc = 1;
2719 destElem = 0;
2720 } else if (imm) {
2721 BigElement mid = (srcElem1 >> (imm - 1));
2722 uint64_t rBit = mid & 0x1;
2723 mid >>= 1;
2724 mid |= -(mid & ((BigElement)1 <<
2725 (sizeof(BigElement) * 8 - 1 - imm)));
2726 mid += rBit;
2727 if (bits(mid, sizeof(BigElement) * 8 - 1,
2728 sizeof(Element) * 8) != 0) {
2729 if (srcElem1 < 0) {
2730 destElem = 0;
2731 } else {
2732 destElem = mask(sizeof(Element) * 8);
2733 }
2734 fpscr.qc = 1;
2735 } else {
2736 destElem = mid;
2737 }
2738 } else {
2739 if (srcElem1 < 0) {
2740 fpscr.qc = 1;
2741 destElem = 0;
2742 } else {
2743 destElem = srcElem1;
2744 }
2745 }
2746 FpscrQc = fpscr;
2747 '''
2748 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2749 "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2750
2751 vshllCode = '''
2752 if (imm >= sizeof(destElem) * 8) {
2753 destElem = 0;
2754 } else {
2755 destElem = (BigElement)srcElem1 << imm;
2756 }
2757 '''
2758 twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2759
2760 vmovlCode = '''
2761 destElem = srcElem1;
2762 '''
2763 twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2764
2765 vcvt2ufxCode = '''
2766 FPSCR fpscr = (FPSCR) FpscrExc;
2767 if (flushToZero(srcElem1))
2768 fpscr.idc = 1;
2769 VfpSavedState state = prepFpState(VfpRoundNearest);
2770 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2771 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2772 __asm__ __volatile__("" :: "m" (destReg));
2773 finishVfp(fpscr, state, true);
2774 FpscrExc = fpscr;
2775 '''
2776 twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2777 2, vcvt2ufxCode, toInt = True)
2778 twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2779 4, vcvt2ufxCode, toInt = True)
2780
2781 vcvt2sfxCode = '''
2782 FPSCR fpscr = (FPSCR) FpscrExc;
2783 if (flushToZero(srcElem1))
2784 fpscr.idc = 1;
2785 VfpSavedState state = prepFpState(VfpRoundNearest);
2786 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2787 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2788 __asm__ __volatile__("" :: "m" (destReg));
2789 finishVfp(fpscr, state, true);
2790 FpscrExc = fpscr;
2791 '''
2792 twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2793 2, vcvt2sfxCode, toInt = True)
2794 twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2795 4, vcvt2sfxCode, toInt = True)
2796
2797 vcvtu2fpCode = '''
2798 FPSCR fpscr = (FPSCR) FpscrExc;
2799 VfpSavedState state = prepFpState(VfpRoundNearest);
2800 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2801 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2802 __asm__ __volatile__("" :: "m" (destElem));
2803 finishVfp(fpscr, state, true);
2804 FpscrExc = fpscr;
2805 '''
2806 twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2807 2, vcvtu2fpCode, fromInt = True)
2808 twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2809 4, vcvtu2fpCode, fromInt = True)
2810
2811 vcvts2fpCode = '''
2812 FPSCR fpscr = (FPSCR) FpscrExc;
2813 VfpSavedState state = prepFpState(VfpRoundNearest);
2814 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2815 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2816 __asm__ __volatile__("" :: "m" (destElem));
2817 finishVfp(fpscr, state, true);
2818 FpscrExc = fpscr;
2819 '''
2820 twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2821 2, vcvts2fpCode, fromInt = True)
2822 twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2823 4, vcvts2fpCode, fromInt = True)
2824
2825 vcvts2hCode = '''
2826 FPSCR fpscr = (FPSCR) FpscrExc;
2827 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2828 if (flushToZero(srcFp1))
2829 fpscr.idc = 1;
2830 VfpSavedState state = prepFpState(VfpRoundNearest);
2831 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2832 : "m" (srcFp1), "m" (destElem));
2833 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2834 fpscr.ahp, srcFp1);
2835 __asm__ __volatile__("" :: "m" (destElem));
2836 finishVfp(fpscr, state, true);
2837 FpscrExc = fpscr;
2838 '''
2839 twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2840
2841 vcvth2sCode = '''
2842 FPSCR fpscr = (FPSCR) FpscrExc;
2843 VfpSavedState state = prepFpState(VfpRoundNearest);
2844 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2845 : "m" (srcElem1), "m" (destElem));
2846 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2847 __asm__ __volatile__("" :: "m" (destElem));
2848 finishVfp(fpscr, state, true);
2849 FpscrExc = fpscr;
2850 '''
2851 twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2852
2853 vrsqrteCode = '''
2854 destElem = unsignedRSqrtEstimate(srcElem1);
2855 '''
2856 twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2857 twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2858
2859 vrsqrtefpCode = '''
2860 FPSCR fpscr = (FPSCR) FpscrExc;
2861 if (flushToZero(srcReg1))
2862 fpscr.idc = 1;
2863 destReg = fprSqrtEstimate(fpscr, srcReg1);
2864 FpscrExc = fpscr;
2865 '''
2866 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2867 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2868
2869 vrecpeCode = '''
2870 destElem = unsignedRecipEstimate(srcElem1);
2871 '''
2872 twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2873 twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2874
2875 vrecpefpCode = '''
2876 FPSCR fpscr = (FPSCR) FpscrExc;
2877 if (flushToZero(srcReg1))
2878 fpscr.idc = 1;
2879 destReg = fpRecipEstimate(fpscr, srcReg1);
2880 FpscrExc = fpscr;
2881 '''
2882 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2883 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2884
2885 vrev16Code = '''
2886 destElem = srcElem1;
2887 unsigned groupSize = ((1 << 1) / sizeof(Element));
2888 unsigned reverseMask = (groupSize - 1);
2889 j = i ^ reverseMask;
2890 '''
2891 twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2892 twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2893 vrev32Code = '''
2894 destElem = srcElem1;
2895 unsigned groupSize = ((1 << 2) / sizeof(Element));
2896 unsigned reverseMask = (groupSize - 1);
2897 j = i ^ reverseMask;
2898 '''
2899 twoRegMiscInst("vrev32", "NVrev32D",
2900 "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2901 twoRegMiscInst("vrev32", "NVrev32Q",
2902 "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2903 vrev64Code = '''
2904 destElem = srcElem1;
2905 unsigned groupSize = ((1 << 3) / sizeof(Element));
2906 unsigned reverseMask = (groupSize - 1);
2907 j = i ^ reverseMask;
2908 '''
2909 twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2910 twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2911
2912 vpaddlCode = '''
2913 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2914 '''
2915 twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2916 twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2917
2918 vpadalCode = '''
2919 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2920 '''
2921 twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2922 twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2923
2924 vclsCode = '''
2925 unsigned count = 0;
2926 if (srcElem1 < 0) {
2927 srcElem1 <<= 1;
2928 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2929 count++;
2930 srcElem1 <<= 1;
2931 }
2932 } else {
2933 srcElem1 <<= 1;
2934 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2935 count++;
2936 srcElem1 <<= 1;
2937 }
2938 }
2939 destElem = count;
2940 '''
2941 twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2942 twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2943
2944 vclzCode = '''
2945 unsigned count = 0;
2946 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2947 count++;
2948 srcElem1 <<= 1;
2949 }
2950 destElem = count;
2951 '''
2952 twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2953 twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2954
2955 vcntCode = '''
2956 unsigned count = 0;
2957 while (srcElem1 && count < sizeof(Element) * 8) {
2958 count += srcElem1 & 0x1;
2959 srcElem1 >>= 1;
2960 }
2961 destElem = count;
2962 '''
2963
2964 twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2965 twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2966
2967 vmvnCode = '''
2968 destElem = ~srcElem1;
2969 '''
2970 twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2971 twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2972
2973 vqabsCode = '''
2974 FPSCR fpscr = (FPSCR) FpscrQc;
2975 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2976 fpscr.qc = 1;
2977 destElem = ~srcElem1;
2978 } else if (srcElem1 < 0) {
2979 destElem = -srcElem1;
2980 } else {
2981 destElem = srcElem1;
2982 }
2983 FpscrQc = fpscr;
2984 '''
2985 twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2986 twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2987
2988 vqnegCode = '''
2989 FPSCR fpscr = (FPSCR) FpscrQc;
2990 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2991 fpscr.qc = 1;
2992 destElem = ~srcElem1;
2993 } else {
2994 destElem = -srcElem1;
2995 }
2996 FpscrQc = fpscr;
2997 '''
2998 twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2999 twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3000
3001 vabsCode = '''
3002 if (srcElem1 < 0) {
3003 destElem = -srcElem1;
3004 } else {
3005 destElem = srcElem1;
3006 }
3007 '''
3008
3009 twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3010 twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3011 vabsfpCode = '''
3012 union
3013 {
3014 uint32_t i;
3015 float f;
3016 } cStruct;
3017 cStruct.f = srcReg1;
3018 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3019 destReg = cStruct.f;
3020 '''
3021 twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3022 twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3023
3024 vnegCode = '''
3025 destElem = -srcElem1;
3026 '''
3027 twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3028 twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3029 vnegfpCode = '''
3030 destReg = -srcReg1;
3031 '''
3032 twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3033 twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3034
3035 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3036 twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3037 twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3038 vcgtfpCode = '''
3039 FPSCR fpscr = (FPSCR) FpscrExc;
3040 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3041 true, true, VfpRoundNearest);
3042 destReg = (res == 0) ? -1 : 0;
3043 if (res == 2.0)
3044 fpscr.ioc = 1;
3045 FpscrExc = fpscr;
3046 '''
3047 twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3048 2, vcgtfpCode, toInt = True)
3049 twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3050 4, vcgtfpCode, toInt = True)
3051
3052 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3053 twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3054 twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3055 vcgefpCode = '''
3056 FPSCR fpscr = (FPSCR) FpscrExc;
3057 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3058 true, true, VfpRoundNearest);
3059 destReg = (res == 0) ? -1 : 0;
3060 if (res == 2.0)
3061 fpscr.ioc = 1;
3062 FpscrExc = fpscr;
3063 '''
3064 twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3065 2, vcgefpCode, toInt = True)
3066 twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3067 4, vcgefpCode, toInt = True)
3068
3069 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3070 twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3071 twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3072 vceqfpCode = '''
3073 FPSCR fpscr = (FPSCR) FpscrExc;
3074 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3075 true, true, VfpRoundNearest);
3076 destReg = (res == 0) ? -1 : 0;
3077 if (res == 2.0)
3078 fpscr.ioc = 1;
3079 FpscrExc = fpscr;
3080 '''
3081 twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3082 2, vceqfpCode, toInt = True)
3083 twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3084 4, vceqfpCode, toInt = True)
3085
3086 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3087 twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3088 twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3089 vclefpCode = '''
3090 FPSCR fpscr = (FPSCR) FpscrExc;
3091 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3092 true, true, VfpRoundNearest);
3093 destReg = (res == 0) ? -1 : 0;
3094 if (res == 2.0)
3095 fpscr.ioc = 1;
3096 FpscrExc = fpscr;
3097 '''
3098 twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3099 2, vclefpCode, toInt = True)
3100 twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3101 4, vclefpCode, toInt = True)
3102
3103 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3104 twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3105 twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3106 vcltfpCode = '''
3107 FPSCR fpscr = (FPSCR) FpscrExc;
3108 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3109 true, true, VfpRoundNearest);
3110 destReg = (res == 0) ? -1 : 0;
3111 if (res == 2.0)
3112 fpscr.ioc = 1;
3113 FpscrExc = fpscr;
3114 '''
3115 twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3116 2, vcltfpCode, toInt = True)
3117 twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3118 4, vcltfpCode, toInt = True)
3119
3120 vswpCode = '''
3121 FloatRegBits mid;
3122 for (unsigned r = 0; r < rCount; r++) {
3123 mid = srcReg1.regs[r];
3124 srcReg1.regs[r] = destReg.regs[r];
3125 destReg.regs[r] = mid;
3126 }
3127 '''
3128 twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3129 twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3130
3131 vtrnCode = '''
3132 Element mid;
3133 for (unsigned i = 0; i < eCount; i += 2) {
3134 mid = srcReg1.elements[i];
3135 srcReg1.elements[i] = destReg.elements[i + 1];
3136 destReg.elements[i + 1] = mid;
3137 }
3138 '''
3143 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3144 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3139 twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3140 smallUnsignedTypes, 2, vtrnCode)
3141 twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3142 smallUnsignedTypes, 4, vtrnCode)
3143
3144 vuzpCode = '''
3145 Element mid[eCount];
3146 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3147 for (unsigned i = 0; i < eCount / 2; i++) {
3148 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3149 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3150 destReg.elements[i] = destReg.elements[2 * i];
3151 }
3152 for (unsigned i = 0; i < eCount / 2; i++) {
3153 destReg.elements[eCount / 2 + i] = mid[2 * i];
3154 }
3155 '''
3156 twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3157 twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3158
3159 vzipCode = '''
3160 Element mid[eCount];
3161 memcpy(&mid, &destReg, sizeof(destReg));
3162 for (unsigned i = 0; i < eCount / 2; i++) {
3163 destReg.elements[2 * i] = mid[i];
3164 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3165 }
3166 for (int i = 0; i < eCount / 2; i++) {
3167 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3168 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3169 }
3170 '''
3171 twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3172 twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3173
3174 vmovnCode = 'destElem = srcElem1;'
3175 twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3176
3177 vdupCode = 'destElem = srcElem1;'
3178 twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3179 twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3180
3181 def vdupGprInst(name, Name, opClass, types, rCount):
3182 global header_output, exec_output
3183 eWalkCode = '''
3184 RegVect destReg;
3185 for (unsigned i = 0; i < eCount; i++) {
3186 destReg.elements[i] = htog((Element)Op1);
3187 }
3188 '''
3189 for reg in range(rCount):
3190 eWalkCode += '''
3191 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3192 ''' % { "reg" : reg }
3193 iop = InstObjParams(name, Name,
3194 "RegRegOp",
3195 { "code": eWalkCode,
3196 "r_count": rCount,
3197 "predicate_test": predicateTest,
3198 "op_class": opClass }, [])
3199 header_output += NeonRegRegOpDeclare.subst(iop)
3200 exec_output += NeonEqualRegExecute.subst(iop)
3201 for type in types:
3202 substDict = { "targs" : type,
3203 "class_name" : Name }
3204 exec_output += NeonExecDeclare.subst(substDict)
3205 vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3206 vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3207
3208 vmovCode = 'destElem = imm;'
3209 oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3210 oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3211
3212 vorrCode = 'destElem |= imm;'
3213 oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3214 oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3215
3216 vmvnCode = 'destElem = ~imm;'
3217 oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3218 oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3219
3220 vbicCode = 'destElem &= ~imm;'
3221 oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3222 oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3223
3224 vqmovnCode = '''
3225 FPSCR fpscr = (FPSCR) FpscrQc;
3226 destElem = srcElem1;
3227 if ((BigElement)destElem != srcElem1) {
3228 fpscr.qc = 1;
3229 destElem = mask(sizeof(Element) * 8 - 1);
3230 if (srcElem1 < 0)
3231 destElem = ~destElem;
3232 }
3233 FpscrQc = fpscr;
3234 '''
3235 twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3236
3237 vqmovunCode = '''
3238 FPSCR fpscr = (FPSCR) FpscrQc;
3239 destElem = srcElem1;
3240 if ((BigElement)destElem != srcElem1) {
3241 fpscr.qc = 1;
3242 destElem = mask(sizeof(Element) * 8);
3243 }
3244 FpscrQc = fpscr;
3245 '''
3246 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3247 "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3248
3249 vqmovunsCode = '''
3250 FPSCR fpscr = (FPSCR) FpscrQc;
3251 destElem = srcElem1;
3252 if (srcElem1 < 0 ||
3253 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3254 fpscr.qc = 1;
3255 destElem = mask(sizeof(Element) * 8);
3256 if (srcElem1 < 0)
3257 destElem = ~destElem;
3258 }
3259 FpscrQc = fpscr;
3260 '''
3261 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3262 "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3263
3264 def buildVext(name, Name, opClass, types, rCount, op):
3265 global header_output, exec_output
3266 eWalkCode = '''
3267 RegVect srcReg1, srcReg2, destReg;
3268 '''
3269 for reg in range(rCount):
3270 eWalkCode += simdEnabledCheckCode + '''
3271 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3272 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3273 ''' % { "reg" : reg }
3274 eWalkCode += op
3275 for reg in range(rCount):
3276 eWalkCode += '''
3277 FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3278 ''' % { "reg" : reg }
3279 iop = InstObjParams(name, Name,
3280 "RegRegRegImmOp",
3281 { "code": eWalkCode,
3282 "r_count": rCount,
3283 "predicate_test": predicateTest,
3284 "op_class": opClass }, [])
3285 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3286 exec_output += NeonEqualRegExecute.subst(iop)
3287 for type in types:
3288 substDict = { "targs" : type,
3289 "class_name" : Name }
3290 exec_output += NeonExecDeclare.subst(substDict)
3291
3292 vextCode = '''
3293 for (unsigned i = 0; i < eCount; i++) {
3294 unsigned index = i + imm;
3295 if (index < eCount) {
3296 destReg.elements[i] = srcReg1.elements[index];
3297 } else {
3298 index -= eCount;
3299 if (index >= eCount)
3300#if FULL_SYSTEM
3301 fault = new UndefinedInstruction;
3302#else
3303 fault = new UndefinedInstruction(false, mnemonic);
3304#endif
3305 else
3306 destReg.elements[i] = srcReg2.elements[index];
3307 }
3308 }
3309 '''
3310 buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3311 buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3312
3313 def buildVtbxl(name, Name, opClass, length, isVtbl):
3314 global header_output, decoder_output, exec_output
3315 code = '''
3316 union
3317 {
3318 uint8_t bytes[32];
3319 FloatRegBits regs[8];
3320 } table;
3321
3322 union
3323 {
3324 uint8_t bytes[8];
3325 FloatRegBits regs[2];
3326 } destReg, srcReg2;
3327
3328 const unsigned length = %(length)d;
3329 const bool isVtbl = %(isVtbl)s;
3330
3331 srcReg2.regs[0] = htog(FpOp2P0_uw);
3332 srcReg2.regs[1] = htog(FpOp2P1_uw);
3333
3334 destReg.regs[0] = htog(FpDestP0_uw);
3335 destReg.regs[1] = htog(FpDestP1_uw);
3336 ''' % { "length" : length, "isVtbl" : isVtbl }
3337 for reg in range(8):
3338 if reg < length * 2:
3339 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3340 { "reg" : reg }
3341 else:
3342 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3343 code += '''
3344 for (unsigned i = 0; i < sizeof(destReg); i++) {
3345 uint8_t index = srcReg2.bytes[i];
3346 if (index < 8 * length) {
3347 destReg.bytes[i] = table.bytes[index];
3348 } else {
3349 if (isVtbl)
3350 destReg.bytes[i] = 0;
3351 // else destReg.bytes[i] unchanged
3352 }
3353 }
3354
3355 FpDestP0_uw = gtoh(destReg.regs[0]);
3356 FpDestP1_uw = gtoh(destReg.regs[1]);
3357 '''
3358 iop = InstObjParams(name, Name,
3359 "RegRegRegOp",
3360 { "code": code,
3361 "predicate_test": predicateTest,
3362 "op_class": opClass }, [])
3363 header_output += RegRegRegOpDeclare.subst(iop)
3364 decoder_output += RegRegRegOpConstructor.subst(iop)
3365 exec_output += PredOpExecute.subst(iop)
3366
3367 buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3368 buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3369 buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3370 buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3371
3372 buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3373 buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3374 buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3375 buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3376}};