neon.isa (7640:5286a8a469c5) neon.isa (7641:788c719d0fc8)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (isSnan(op1) || isSnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (isSnan(op1) || isSnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (isSnan(op1) || isSnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (isSnan(op1) || isSnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (isSnan(op1) || isSnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (isSnan(op1) || isSnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623 simdEnabledCheckCode = '''
624 if (!neonEnabled(Cpacr, Cpsr, Fpexc))
625 return disabledFault();
626 '''
627}};
628
629let {{
630
631 header_output = ""
632 exec_output = ""
633
634 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
635 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
636 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
637 signedTypes = smallSignedTypes + ("int64_t",)
638 smallTypes = smallUnsignedTypes + smallSignedTypes
639 allTypes = unsignedTypes + signedTypes
640
641 def threeEqualRegInst(name, Name, types, rCount, op,
642 readDest=False, pairwise=False):
643 global header_output, exec_output
644 eWalkCode = simdEnabledCheckCode + '''
645 RegVect srcReg1, srcReg2, destReg;
646 '''
647 for reg in range(rCount):
648 eWalkCode += '''
649 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
650 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
651 ''' % { "reg" : reg }
652 if readDest:
653 eWalkCode += '''
654 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
655 ''' % { "reg" : reg }
656 readDestCode = ''
657 if readDest:
658 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
659 if pairwise:
660 eWalkCode += '''
661 for (unsigned i = 0; i < eCount; i++) {
662 Element srcElem1 = gtoh(2 * i < eCount ?
663 srcReg1.elements[2 * i] :
664 srcReg2.elements[2 * i - eCount]);
665 Element srcElem2 = gtoh(2 * i < eCount ?
666 srcReg1.elements[2 * i + 1] :
667 srcReg2.elements[2 * i + 1 - eCount]);
668 Element destElem;
669 %(readDest)s
670 %(op)s
671 destReg.elements[i] = htog(destElem);
672 }
673 ''' % { "op" : op, "readDest" : readDestCode }
674 else:
675 eWalkCode += '''
676 for (unsigned i = 0; i < eCount; i++) {
677 Element srcElem1 = gtoh(srcReg1.elements[i]);
678 Element srcElem2 = gtoh(srcReg2.elements[i]);
679 Element destElem;
680 %(readDest)s
681 %(op)s
682 destReg.elements[i] = htog(destElem);
683 }
684 ''' % { "op" : op, "readDest" : readDestCode }
685 for reg in range(rCount):
686 eWalkCode += '''
687 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
688 ''' % { "reg" : reg }
689 iop = InstObjParams(name, Name,
690 "RegRegRegOp",
691 { "code": eWalkCode,
692 "r_count": rCount,
693 "predicate_test": predicateTest }, [])
694 header_output += NeonRegRegRegOpDeclare.subst(iop)
695 exec_output += NeonEqualRegExecute.subst(iop)
696 for type in types:
697 substDict = { "targs" : type,
698 "class_name" : Name }
699 exec_output += NeonExecDeclare.subst(substDict)
700
701 def threeEqualRegInstFp(name, Name, types, rCount, op,
702 readDest=False, pairwise=False, toInt=False):
703 global header_output, exec_output
704 eWalkCode = simdEnabledCheckCode + '''
705 typedef FloatReg FloatVect[rCount];
706 FloatVect srcRegs1, srcRegs2;
707 '''
708 if toInt:
709 eWalkCode += 'RegVect destRegs;\n'
710 else:
711 eWalkCode += 'FloatVect destRegs;\n'
712 for reg in range(rCount):
713 eWalkCode += '''
714 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
715 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
716 ''' % { "reg" : reg }
717 if readDest:
718 if toInt:
719 eWalkCode += '''
720 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
721 ''' % { "reg" : reg }
722 else:
723 eWalkCode += '''
724 destRegs[%(reg)d] = FpDestP%(reg)d;
725 ''' % { "reg" : reg }
726 readDestCode = ''
727 if readDest:
728 readDestCode = 'destReg = destRegs[r];'
729 destType = 'FloatReg'
730 writeDest = 'destRegs[r] = destReg;'
731 if toInt:
732 destType = 'FloatRegBits'
733 writeDest = 'destRegs.regs[r] = destReg;'
734 if pairwise:
735 eWalkCode += '''
736 for (unsigned r = 0; r < rCount; r++) {
737 FloatReg srcReg1 = (2 * r < rCount) ?
738 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
739 FloatReg srcReg2 = (2 * r < rCount) ?
740 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
741 %(destType)s destReg;
742 %(readDest)s
743 %(op)s
744 %(writeDest)s
745 }
746 ''' % { "op" : op,
747 "readDest" : readDestCode,
748 "destType" : destType,
749 "writeDest" : writeDest }
750 else:
751 eWalkCode += '''
752 for (unsigned r = 0; r < rCount; r++) {
753 FloatReg srcReg1 = srcRegs1[r];
754 FloatReg srcReg2 = srcRegs2[r];
755 %(destType)s destReg;
756 %(readDest)s
757 %(op)s
758 %(writeDest)s
759 }
760 ''' % { "op" : op,
761 "readDest" : readDestCode,
762 "destType" : destType,
763 "writeDest" : writeDest }
764 for reg in range(rCount):
765 if toInt:
766 eWalkCode += '''
767 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
768 ''' % { "reg" : reg }
769 else:
770 eWalkCode += '''
771 FpDestP%(reg)d = destRegs[%(reg)d];
772 ''' % { "reg" : reg }
773 iop = InstObjParams(name, Name,
774 "FpRegRegRegOp",
775 { "code": eWalkCode,
776 "r_count": rCount,
777 "predicate_test": predicateTest }, [])
778 header_output += NeonRegRegRegOpDeclare.subst(iop)
779 exec_output += NeonEqualRegExecute.subst(iop)
780 for type in types:
781 substDict = { "targs" : type,
782 "class_name" : Name }
783 exec_output += NeonExecDeclare.subst(substDict)
784
785 def threeUnequalRegInst(name, Name, types, op,
786 bigSrc1, bigSrc2, bigDest, readDest):
787 global header_output, exec_output
788 src1Cnt = src2Cnt = destCnt = 2
789 src1Prefix = src2Prefix = destPrefix = ''
790 if bigSrc1:
791 src1Cnt = 4
792 src1Prefix = 'Big'
793 if bigSrc2:
794 src2Cnt = 4
795 src2Prefix = 'Big'
796 if bigDest:
797 destCnt = 4
798 destPrefix = 'Big'
799 eWalkCode = simdEnabledCheckCode + '''
800 %sRegVect srcReg1;
801 %sRegVect srcReg2;
802 %sRegVect destReg;
803 ''' % (src1Prefix, src2Prefix, destPrefix)
804 for reg in range(src1Cnt):
805 eWalkCode += '''
806 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
807 ''' % { "reg" : reg }
808 for reg in range(src2Cnt):
809 eWalkCode += '''
810 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
811 ''' % { "reg" : reg }
812 if readDest:
813 for reg in range(destCnt):
814 eWalkCode += '''
815 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
816 ''' % { "reg" : reg }
817 readDestCode = ''
818 if readDest:
819 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
820 eWalkCode += '''
821 for (unsigned i = 0; i < eCount; i++) {
822 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
823 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
824 %(destPrefix)sElement destElem;
825 %(readDest)s
826 %(op)s
827 destReg.elements[i] = htog(destElem);
828 }
829 ''' % { "op" : op, "readDest" : readDestCode,
830 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
831 "destPrefix" : destPrefix }
832 for reg in range(destCnt):
833 eWalkCode += '''
834 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
835 ''' % { "reg" : reg }
836 iop = InstObjParams(name, Name,
837 "RegRegRegOp",
838 { "code": eWalkCode,
839 "r_count": 2,
840 "predicate_test": predicateTest }, [])
841 header_output += NeonRegRegRegOpDeclare.subst(iop)
842 exec_output += NeonUnequalRegExecute.subst(iop)
843 for type in types:
844 substDict = { "targs" : type,
845 "class_name" : Name }
846 exec_output += NeonExecDeclare.subst(substDict)
847
848 def threeRegNarrowInst(name, Name, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, types, op,
850 True, True, False, readDest)
851
852 def threeRegLongInst(name, Name, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, types, op,
854 False, False, True, readDest)
855
856 def threeRegWideInst(name, Name, types, op, readDest=False):
857 threeUnequalRegInst(name, Name, types, op,
858 True, False, True, readDest)
859
860 def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
861 global header_output, exec_output
862 eWalkCode = simdEnabledCheckCode + '''
863 RegVect srcReg1, srcReg2, destReg;
864 '''
865 for reg in range(rCount):
866 eWalkCode += '''
867 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
868 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
869 ''' % { "reg" : reg }
870 if readDest:
871 eWalkCode += '''
872 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
873 ''' % { "reg" : reg }
874 readDestCode = ''
875 if readDest:
876 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
877 eWalkCode += '''
878 assert(imm >= 0 && imm < eCount);
879 for (unsigned i = 0; i < eCount; i++) {
880 Element srcElem1 = gtoh(srcReg1.elements[i]);
881 Element srcElem2 = gtoh(srcReg2.elements[imm]);
882 Element destElem;
883 %(readDest)s
884 %(op)s
885 destReg.elements[i] = htog(destElem);
886 }
887 ''' % { "op" : op, "readDest" : readDestCode }
888 for reg in range(rCount):
889 eWalkCode += '''
890 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
891 ''' % { "reg" : reg }
892 iop = InstObjParams(name, Name,
893 "RegRegRegImmOp",
894 { "code": eWalkCode,
895 "r_count": rCount,
896 "predicate_test": predicateTest }, [])
897 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
898 exec_output += NeonEqualRegExecute.subst(iop)
899 for type in types:
900 substDict = { "targs" : type,
901 "class_name" : Name }
902 exec_output += NeonExecDeclare.subst(substDict)
903
904 def twoRegLongInst(name, Name, types, op, readDest=False):
905 global header_output, exec_output
906 rCount = 2
907 eWalkCode = simdEnabledCheckCode + '''
908 RegVect srcReg1, srcReg2;
909 BigRegVect destReg;
910 '''
911 for reg in range(rCount):
912 eWalkCode += '''
913 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
914 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
915 ''' % { "reg" : reg }
916 if readDest:
917 for reg in range(2 * rCount):
918 eWalkCode += '''
919 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
920 ''' % { "reg" : reg }
921 readDestCode = ''
922 if readDest:
923 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
924 eWalkCode += '''
925 assert(imm >= 0 && imm < eCount);
926 for (unsigned i = 0; i < eCount; i++) {
927 Element srcElem1 = gtoh(srcReg1.elements[i]);
928 Element srcElem2 = gtoh(srcReg2.elements[imm]);
929 BigElement destElem;
930 %(readDest)s
931 %(op)s
932 destReg.elements[i] = htog(destElem);
933 }
934 ''' % { "op" : op, "readDest" : readDestCode }
935 for reg in range(2 * rCount):
936 eWalkCode += '''
937 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
938 ''' % { "reg" : reg }
939 iop = InstObjParams(name, Name,
940 "RegRegRegImmOp",
941 { "code": eWalkCode,
942 "r_count": rCount,
943 "predicate_test": predicateTest }, [])
944 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
945 exec_output += NeonUnequalRegExecute.subst(iop)
946 for type in types:
947 substDict = { "targs" : type,
948 "class_name" : Name }
949 exec_output += NeonExecDeclare.subst(substDict)
950
951 def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
952 global header_output, exec_output
953 eWalkCode = simdEnabledCheckCode + '''
954 typedef FloatReg FloatVect[rCount];
955 FloatVect srcRegs1, srcRegs2, destRegs;
956 '''
957 for reg in range(rCount):
958 eWalkCode += '''
959 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
960 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
961 ''' % { "reg" : reg }
962 if readDest:
963 eWalkCode += '''
964 destRegs[%(reg)d] = FpDestP%(reg)d;
965 ''' % { "reg" : reg }
966 readDestCode = ''
967 if readDest:
968 readDestCode = 'destReg = destRegs[i];'
969 eWalkCode += '''
970 assert(imm >= 0 && imm < rCount);
971 for (unsigned i = 0; i < rCount; i++) {
972 FloatReg srcReg1 = srcRegs1[i];
973 FloatReg srcReg2 = srcRegs2[imm];
974 FloatReg destReg;
975 %(readDest)s
976 %(op)s
977 destRegs[i] = destReg;
978 }
979 ''' % { "op" : op, "readDest" : readDestCode }
980 for reg in range(rCount):
981 eWalkCode += '''
982 FpDestP%(reg)d = destRegs[%(reg)d];
983 ''' % { "reg" : reg }
984 iop = InstObjParams(name, Name,
985 "FpRegRegRegImmOp",
986 { "code": eWalkCode,
987 "r_count": rCount,
988 "predicate_test": predicateTest }, [])
989 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
990 exec_output += NeonEqualRegExecute.subst(iop)
991 for type in types:
992 substDict = { "targs" : type,
993 "class_name" : Name }
994 exec_output += NeonExecDeclare.subst(substDict)
995
996 def twoRegShiftInst(name, Name, types, rCount, op,
997 readDest=False, toInt=False, fromInt=False):
998 global header_output, exec_output
999 eWalkCode = simdEnabledCheckCode + '''
1000 RegVect srcRegs1, destRegs;
1001 '''
1002 for reg in range(rCount):
1003 eWalkCode += '''
1004 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1005 ''' % { "reg" : reg }
1006 if readDest:
1007 eWalkCode += '''
1008 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1009 ''' % { "reg" : reg }
1010 readDestCode = ''
1011 if readDest:
1012 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1013 if toInt:
1014 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1015 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1016 if fromInt:
1017 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1018 declDest = 'Element destElem;'
1019 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1020 if toInt:
1021 declDest = 'FloatRegBits destReg;'
1022 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1023 eWalkCode += '''
1024 for (unsigned i = 0; i < eCount; i++) {
1025 %(readOp)s
1026 %(declDest)s
1027 %(readDest)s
1028 %(op)s
1029 %(writeDest)s
1030 }
1031 ''' % { "readOp" : readOpCode,
1032 "declDest" : declDest,
1033 "readDest" : readDestCode,
1034 "op" : op,
1035 "writeDest" : writeDestCode }
1036 for reg in range(rCount):
1037 eWalkCode += '''
1038 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1039 ''' % { "reg" : reg }
1040 iop = InstObjParams(name, Name,
1041 "RegRegImmOp",
1042 { "code": eWalkCode,
1043 "r_count": rCount,
1044 "predicate_test": predicateTest }, [])
1045 header_output += NeonRegRegImmOpDeclare.subst(iop)
1046 exec_output += NeonEqualRegExecute.subst(iop)
1047 for type in types:
1048 substDict = { "targs" : type,
1049 "class_name" : Name }
1050 exec_output += NeonExecDeclare.subst(substDict)
1051
1052 def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1053 global header_output, exec_output
1054 eWalkCode = simdEnabledCheckCode + '''
1055 BigRegVect srcReg1;
1056 RegVect destReg;
1057 '''
1058 for reg in range(4):
1059 eWalkCode += '''
1060 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1061 ''' % { "reg" : reg }
1062 if readDest:
1063 for reg in range(2):
1064 eWalkCode += '''
1065 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1066 ''' % { "reg" : reg }
1067 readDestCode = ''
1068 if readDest:
1069 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1070 eWalkCode += '''
1071 for (unsigned i = 0; i < eCount; i++) {
1072 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1073 Element destElem;
1074 %(readDest)s
1075 %(op)s
1076 destReg.elements[i] = htog(destElem);
1077 }
1078 ''' % { "op" : op, "readDest" : readDestCode }
1079 for reg in range(2):
1080 eWalkCode += '''
1081 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1082 ''' % { "reg" : reg }
1083 iop = InstObjParams(name, Name,
1084 "RegRegImmOp",
1085 { "code": eWalkCode,
1086 "r_count": 2,
1087 "predicate_test": predicateTest }, [])
1088 header_output += NeonRegRegImmOpDeclare.subst(iop)
1089 exec_output += NeonUnequalRegExecute.subst(iop)
1090 for type in types:
1091 substDict = { "targs" : type,
1092 "class_name" : Name }
1093 exec_output += NeonExecDeclare.subst(substDict)
1094
1095 def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1096 global header_output, exec_output
1097 eWalkCode = simdEnabledCheckCode + '''
1098 RegVect srcReg1;
1099 BigRegVect destReg;
1100 '''
1101 for reg in range(2):
1102 eWalkCode += '''
1103 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1104 ''' % { "reg" : reg }
1105 if readDest:
1106 for reg in range(4):
1107 eWalkCode += '''
1108 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1109 ''' % { "reg" : reg }
1110 readDestCode = ''
1111 if readDest:
1112 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1113 eWalkCode += '''
1114 for (unsigned i = 0; i < eCount; i++) {
1115 Element srcElem1 = gtoh(srcReg1.elements[i]);
1116 BigElement destElem;
1117 %(readDest)s
1118 %(op)s
1119 destReg.elements[i] = htog(destElem);
1120 }
1121 ''' % { "op" : op, "readDest" : readDestCode }
1122 for reg in range(4):
1123 eWalkCode += '''
1124 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1125 ''' % { "reg" : reg }
1126 iop = InstObjParams(name, Name,
1127 "RegRegImmOp",
1128 { "code": eWalkCode,
1129 "r_count": 2,
1130 "predicate_test": predicateTest }, [])
1131 header_output += NeonRegRegImmOpDeclare.subst(iop)
1132 exec_output += NeonUnequalRegExecute.subst(iop)
1133 for type in types:
1134 substDict = { "targs" : type,
1135 "class_name" : Name }
1136 exec_output += NeonExecDeclare.subst(substDict)
1137
1138 def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1139 global header_output, exec_output
1140 eWalkCode = simdEnabledCheckCode + '''
1141 RegVect srcReg1, destReg;
1142 '''
1143 for reg in range(rCount):
1144 eWalkCode += '''
1145 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1146 ''' % { "reg" : reg }
1147 if readDest:
1148 eWalkCode += '''
1149 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1150 ''' % { "reg" : reg }
1151 readDestCode = ''
1152 if readDest:
1153 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1154 eWalkCode += '''
1155 for (unsigned i = 0; i < eCount; i++) {
1156 unsigned j = i;
1157 Element srcElem1 = gtoh(srcReg1.elements[i]);
1158 Element destElem;
1159 %(readDest)s
1160 %(op)s
1161 destReg.elements[j] = htog(destElem);
1162 }
1163 ''' % { "op" : op, "readDest" : readDestCode }
1164 for reg in range(rCount):
1165 eWalkCode += '''
1166 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1167 ''' % { "reg" : reg }
1168 iop = InstObjParams(name, Name,
1169 "RegRegOp",
1170 { "code": eWalkCode,
1171 "r_count": rCount,
1172 "predicate_test": predicateTest }, [])
1173 header_output += NeonRegRegOpDeclare.subst(iop)
1174 exec_output += NeonEqualRegExecute.subst(iop)
1175 for type in types:
1176 substDict = { "targs" : type,
1177 "class_name" : Name }
1178 exec_output += NeonExecDeclare.subst(substDict)
1179
1180 def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1181 global header_output, exec_output
1182 eWalkCode = simdEnabledCheckCode + '''
1183 RegVect srcReg1, destReg;
1184 '''
1185 for reg in range(rCount):
1186 eWalkCode += '''
1187 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1188 ''' % { "reg" : reg }
1189 if readDest:
1190 eWalkCode += '''
1191 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1192 ''' % { "reg" : reg }
1193 readDestCode = ''
1194 if readDest:
1195 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1196 eWalkCode += '''
1197 for (unsigned i = 0; i < eCount; i++) {
1198 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1199 Element destElem;
1200 %(readDest)s
1201 %(op)s
1202 destReg.elements[i] = htog(destElem);
1203 }
1204 ''' % { "op" : op, "readDest" : readDestCode }
1205 for reg in range(rCount):
1206 eWalkCode += '''
1207 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1208 ''' % { "reg" : reg }
1209 iop = InstObjParams(name, Name,
1210 "RegRegImmOp",
1211 { "code": eWalkCode,
1212 "r_count": rCount,
1213 "predicate_test": predicateTest }, [])
1214 header_output += NeonRegRegImmOpDeclare.subst(iop)
1215 exec_output += NeonEqualRegExecute.subst(iop)
1216 for type in types:
1217 substDict = { "targs" : type,
1218 "class_name" : Name }
1219 exec_output += NeonExecDeclare.subst(substDict)
1220
1221 def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1222 global header_output, exec_output
1223 eWalkCode = simdEnabledCheckCode + '''
1224 RegVect srcReg1, destReg;
1225 '''
1226 for reg in range(rCount):
1227 eWalkCode += '''
1228 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1229 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1230 ''' % { "reg" : reg }
1231 if readDest:
1232 eWalkCode += '''
1233 ''' % { "reg" : reg }
1234 readDestCode = ''
1235 if readDest:
1236 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1237 eWalkCode += op
1238 for reg in range(rCount):
1239 eWalkCode += '''
1240 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1241 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1242 ''' % { "reg" : reg }
1243 iop = InstObjParams(name, Name,
1244 "RegRegOp",
1245 { "code": eWalkCode,
1246 "r_count": rCount,
1247 "predicate_test": predicateTest }, [])
1248 header_output += NeonRegRegOpDeclare.subst(iop)
1249 exec_output += NeonEqualRegExecute.subst(iop)
1250 for type in types:
1251 substDict = { "targs" : type,
1252 "class_name" : Name }
1253 exec_output += NeonExecDeclare.subst(substDict)
1254
1255 def twoRegMiscInstFp(name, Name, types, rCount, op,
1256 readDest=False, toInt=False):
1257 global header_output, exec_output
1258 eWalkCode = simdEnabledCheckCode + '''
1259 typedef FloatReg FloatVect[rCount];
1260 FloatVect srcRegs1;
1261 '''
1262 if toInt:
1263 eWalkCode += 'RegVect destRegs;\n'
1264 else:
1265 eWalkCode += 'FloatVect destRegs;\n'
1266 for reg in range(rCount):
1267 eWalkCode += '''
1268 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1269 ''' % { "reg" : reg }
1270 if readDest:
1271 if toInt:
1272 eWalkCode += '''
1273 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1274 ''' % { "reg" : reg }
1275 else:
1276 eWalkCode += '''
1277 destRegs[%(reg)d] = FpDestP%(reg)d;
1278 ''' % { "reg" : reg }
1279 readDestCode = ''
1280 if readDest:
1281 readDestCode = 'destReg = destRegs[i];'
1282 destType = 'FloatReg'
1283 writeDest = 'destRegs[r] = destReg;'
1284 if toInt:
1285 destType = 'FloatRegBits'
1286 writeDest = 'destRegs.regs[r] = destReg;'
1287 eWalkCode += '''
1288 for (unsigned r = 0; r < rCount; r++) {
1289 FloatReg srcReg1 = srcRegs1[r];
1290 %(destType)s destReg;
1291 %(readDest)s
1292 %(op)s
1293 %(writeDest)s
1294 }
1295 ''' % { "op" : op,
1296 "readDest" : readDestCode,
1297 "destType" : destType,
1298 "writeDest" : writeDest }
1299 for reg in range(rCount):
1300 if toInt:
1301 eWalkCode += '''
1302 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1303 ''' % { "reg" : reg }
1304 else:
1305 eWalkCode += '''
1306 FpDestP%(reg)d = destRegs[%(reg)d];
1307 ''' % { "reg" : reg }
1308 iop = InstObjParams(name, Name,
1309 "FpRegRegOp",
1310 { "code": eWalkCode,
1311 "r_count": rCount,
1312 "predicate_test": predicateTest }, [])
1313 header_output += NeonRegRegOpDeclare.subst(iop)
1314 exec_output += NeonEqualRegExecute.subst(iop)
1315 for type in types:
1316 substDict = { "targs" : type,
1317 "class_name" : Name }
1318 exec_output += NeonExecDeclare.subst(substDict)
1319
1320 def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1321 global header_output, exec_output
1322 eWalkCode = simdEnabledCheckCode + '''
1323 RegVect srcRegs;
1324 BigRegVect destReg;
1325 '''
1326 for reg in range(rCount):
1327 eWalkCode += '''
1328 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1329 ''' % { "reg" : reg }
1330 if readDest:
1331 eWalkCode += '''
1332 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1333 ''' % { "reg" : reg }
1334 readDestCode = ''
1335 if readDest:
1336 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1337 eWalkCode += '''
1338 for (unsigned i = 0; i < eCount / 2; i++) {
1339 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1340 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1341 BigElement destElem;
1342 %(readDest)s
1343 %(op)s
1344 destReg.elements[i] = htog(destElem);
1345 }
1346 ''' % { "op" : op, "readDest" : readDestCode }
1347 for reg in range(rCount):
1348 eWalkCode += '''
1349 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1350 ''' % { "reg" : reg }
1351 iop = InstObjParams(name, Name,
1352 "RegRegOp",
1353 { "code": eWalkCode,
1354 "r_count": rCount,
1355 "predicate_test": predicateTest }, [])
1356 header_output += NeonRegRegOpDeclare.subst(iop)
1357 exec_output += NeonUnequalRegExecute.subst(iop)
1358 for type in types:
1359 substDict = { "targs" : type,
1360 "class_name" : Name }
1361 exec_output += NeonExecDeclare.subst(substDict)
1362
1363 def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1364 global header_output, exec_output
1365 eWalkCode = simdEnabledCheckCode + '''
1366 BigRegVect srcReg1;
1367 RegVect destReg;
1368 '''
1369 for reg in range(4):
1370 eWalkCode += '''
1371 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1372 ''' % { "reg" : reg }
1373 if readDest:
1374 for reg in range(2):
1375 eWalkCode += '''
1376 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1377 ''' % { "reg" : reg }
1378 readDestCode = ''
1379 if readDest:
1380 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1381 eWalkCode += '''
1382 for (unsigned i = 0; i < eCount; i++) {
1383 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1384 Element destElem;
1385 %(readDest)s
1386 %(op)s
1387 destReg.elements[i] = htog(destElem);
1388 }
1389 ''' % { "op" : op, "readDest" : readDestCode }
1390 for reg in range(2):
1391 eWalkCode += '''
1392 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1393 ''' % { "reg" : reg }
1394 iop = InstObjParams(name, Name,
1395 "RegRegOp",
1396 { "code": eWalkCode,
1397 "r_count": 2,
1398 "predicate_test": predicateTest }, [])
1399 header_output += NeonRegRegOpDeclare.subst(iop)
1400 exec_output += NeonUnequalRegExecute.subst(iop)
1401 for type in types:
1402 substDict = { "targs" : type,
1403 "class_name" : Name }
1404 exec_output += NeonExecDeclare.subst(substDict)
1405
1406 def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1407 global header_output, exec_output
1408 eWalkCode = simdEnabledCheckCode + '''
1409 RegVect destReg;
1410 '''
1411 if readDest:
1412 for reg in range(rCount):
1413 eWalkCode += '''
1414 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1415 ''' % { "reg" : reg }
1416 readDestCode = ''
1417 if readDest:
1418 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1419 eWalkCode += '''
1420 for (unsigned i = 0; i < eCount; i++) {
1421 Element destElem;
1422 %(readDest)s
1423 %(op)s
1424 destReg.elements[i] = htog(destElem);
1425 }
1426 ''' % { "op" : op, "readDest" : readDestCode }
1427 for reg in range(rCount):
1428 eWalkCode += '''
1429 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1430 ''' % { "reg" : reg }
1431 iop = InstObjParams(name, Name,
1432 "RegImmOp",
1433 { "code": eWalkCode,
1434 "r_count": rCount,
1435 "predicate_test": predicateTest }, [])
1436 header_output += NeonRegImmOpDeclare.subst(iop)
1437 exec_output += NeonEqualRegExecute.subst(iop)
1438 for type in types:
1439 substDict = { "targs" : type,
1440 "class_name" : Name }
1441 exec_output += NeonExecDeclare.subst(substDict)
1442
1443 def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1444 global header_output, exec_output
1445 eWalkCode = simdEnabledCheckCode + '''
1446 RegVect srcReg1;
1447 BigRegVect destReg;
1448 '''
1449 for reg in range(2):
1450 eWalkCode += '''
1451 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1452 ''' % { "reg" : reg }
1453 if readDest:
1454 for reg in range(4):
1455 eWalkCode += '''
1456 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1457 ''' % { "reg" : reg }
1458 readDestCode = ''
1459 if readDest:
1460 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1461 eWalkCode += '''
1462 for (unsigned i = 0; i < eCount; i++) {
1463 Element srcElem1 = gtoh(srcReg1.elements[i]);
1464 BigElement destElem;
1465 %(readDest)s
1466 %(op)s
1467 destReg.elements[i] = htog(destElem);
1468 }
1469 ''' % { "op" : op, "readDest" : readDestCode }
1470 for reg in range(4):
1471 eWalkCode += '''
1472 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1473 ''' % { "reg" : reg }
1474 iop = InstObjParams(name, Name,
1475 "RegRegOp",
1476 { "code": eWalkCode,
1477 "r_count": 2,
1478 "predicate_test": predicateTest }, [])
1479 header_output += NeonRegRegOpDeclare.subst(iop)
1480 exec_output += NeonUnequalRegExecute.subst(iop)
1481 for type in types:
1482 substDict = { "targs" : type,
1483 "class_name" : Name }
1484 exec_output += NeonExecDeclare.subst(substDict)
1485
1486 vhaddCode = '''
1487 Element carryBit =
1488 (((unsigned)srcElem1 & 0x1) +
1489 ((unsigned)srcElem2 & 0x1)) >> 1;
1490 // Use division instead of a shift to ensure the sign extension works
1491 // right. The compiler will figure out if it can be a shift. Mask the
1492 // inputs so they get truncated correctly.
1493 destElem = (((srcElem1 & ~(Element)1) / 2) +
1494 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1495 '''
1496 threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1497 threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1498
1499 vrhaddCode = '''
1500 Element carryBit =
1501 (((unsigned)srcElem1 & 0x1) +
1502 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1503 // Use division instead of a shift to ensure the sign extension works
1504 // right. The compiler will figure out if it can be a shift. Mask the
1505 // inputs so they get truncated correctly.
1506 destElem = (((srcElem1 & ~(Element)1) / 2) +
1507 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1508 '''
1509 threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1510 threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1511
1512 vhsubCode = '''
1513 Element barrowBit =
1514 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1515 // Use division instead of a shift to ensure the sign extension works
1516 // right. The compiler will figure out if it can be a shift. Mask the
1517 // inputs so they get truncated correctly.
1518 destElem = (((srcElem1 & ~(Element)1) / 2) -
1519 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1520 '''
1521 threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1522 threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1523
1524 vandCode = '''
1525 destElem = srcElem1 & srcElem2;
1526 '''
1527 threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1528 threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1529
1530 vbicCode = '''
1531 destElem = srcElem1 & ~srcElem2;
1532 '''
1533 threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1534 threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1535
1536 vorrCode = '''
1537 destElem = srcElem1 | srcElem2;
1538 '''
1539 threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1540 threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1541
1542 threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1543 threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1544
1545 vornCode = '''
1546 destElem = srcElem1 | ~srcElem2;
1547 '''
1548 threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1549 threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1550
1551 veorCode = '''
1552 destElem = srcElem1 ^ srcElem2;
1553 '''
1554 threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1555 threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1556
1557 vbifCode = '''
1558 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1559 '''
1560 threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1561 threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1562 vbitCode = '''
1563 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1564 '''
1565 threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1566 threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1567 vbslCode = '''
1568 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1569 '''
1570 threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1571 threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1572
1573 vmaxCode = '''
1574 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1575 '''
1576 threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1577 threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1578
1579 vminCode = '''
1580 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1581 '''
1582 threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1583 threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1584
1585 vaddCode = '''
1586 destElem = srcElem1 + srcElem2;
1587 '''
1588 threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1589 threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1590
1591 threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1592 2, vaddCode, pairwise=True)
1593 threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1594 4, vaddCode, pairwise=True)
1595 vaddlwCode = '''
1596 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1597 '''
1598 threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1599 threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1600 vaddhnCode = '''
1601 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1602 (sizeof(Element) * 8);
1603 '''
1604 threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1605 vraddhnCode = '''
1606 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1607 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1608 (sizeof(Element) * 8);
1609 '''
1610 threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1611
1612 vsubCode = '''
1613 destElem = srcElem1 - srcElem2;
1614 '''
1615 threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1616 threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1617 vsublwCode = '''
1618 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1619 '''
1620 threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1621 threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1622
1623 vqaddUCode = '''
1624 destElem = srcElem1 + srcElem2;
1625 FPSCR fpscr = (FPSCR)Fpscr;
1626 if (destElem < srcElem1 || destElem < srcElem2) {
1627 destElem = (Element)(-1);
1628 fpscr.qc = 1;
1629 }
1630 Fpscr = fpscr;
1631 '''
1632 threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1633 threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1634 vsubhnCode = '''
1635 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1636 (sizeof(Element) * 8);
1637 '''
1638 threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1639 vrsubhnCode = '''
1640 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1641 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1642 (sizeof(Element) * 8);
1643 '''
1644 threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1645
1646 vqaddSCode = '''
1647 destElem = srcElem1 + srcElem2;
1648 FPSCR fpscr = (FPSCR)Fpscr;
1649 bool negDest = (destElem < 0);
1650 bool negSrc1 = (srcElem1 < 0);
1651 bool negSrc2 = (srcElem2 < 0);
1652 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1653 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1654 if (negDest)
1655 destElem -= 1;
1656 fpscr.qc = 1;
1657 }
1658 Fpscr = fpscr;
1659 '''
1660 threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1661 threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1662
1663 vqsubUCode = '''
1664 destElem = srcElem1 - srcElem2;
1665 FPSCR fpscr = (FPSCR)Fpscr;
1666 if (destElem > srcElem1) {
1667 destElem = 0;
1668 fpscr.qc = 1;
1669 }
1670 Fpscr = fpscr;
1671 '''
1672 threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1673 threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1674
1675 vqsubSCode = '''
1676 destElem = srcElem1 - srcElem2;
1677 FPSCR fpscr = (FPSCR)Fpscr;
1678 bool negDest = (destElem < 0);
1679 bool negSrc1 = (srcElem1 < 0);
1680 bool posSrc2 = (srcElem2 >= 0);
1681 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1682 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1683 if (negDest)
1684 destElem -= 1;
1685 fpscr.qc = 1;
1686 }
1687 Fpscr = fpscr;
1688 '''
1689 threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1690 threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1691
1692 vcgtCode = '''
1693 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1694 '''
1695 threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1696 threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1697
1698 vcgeCode = '''
1699 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1700 '''
1701 threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1702 threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1703
1704 vceqCode = '''
1705 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1706 '''
1707 threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1708 threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1709
1710 vshlCode = '''
1711 int16_t shiftAmt = (int8_t)srcElem2;
1712 if (shiftAmt < 0) {
1713 shiftAmt = -shiftAmt;
1714 if (shiftAmt >= sizeof(Element) * 8) {
1715 shiftAmt = sizeof(Element) * 8 - 1;
1716 destElem = 0;
1717 } else {
1718 destElem = (srcElem1 >> shiftAmt);
1719 }
1720 // Make sure the right shift sign extended when it should.
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41 template <template <typename T> class Base>
42 StaticInstPtr
43 decodeNeonUThreeUReg(unsigned size,
44 ExtMachInst machInst, IntRegIndex dest,
45 IntRegIndex op1, IntRegIndex op2)
46 {
47 switch (size) {
48 case 0:
49 return new Base<uint8_t>(machInst, dest, op1, op2);
50 case 1:
51 return new Base<uint16_t>(machInst, dest, op1, op2);
52 case 2:
53 return new Base<uint32_t>(machInst, dest, op1, op2);
54 case 3:
55 return new Base<uint64_t>(machInst, dest, op1, op2);
56 default:
57 return new Unknown(machInst);
58 }
59 }
60
61 template <template <typename T> class Base>
62 StaticInstPtr
63 decodeNeonSThreeUReg(unsigned size,
64 ExtMachInst machInst, IntRegIndex dest,
65 IntRegIndex op1, IntRegIndex op2)
66 {
67 switch (size) {
68 case 0:
69 return new Base<int8_t>(machInst, dest, op1, op2);
70 case 1:
71 return new Base<int16_t>(machInst, dest, op1, op2);
72 case 2:
73 return new Base<int32_t>(machInst, dest, op1, op2);
74 case 3:
75 return new Base<int64_t>(machInst, dest, op1, op2);
76 default:
77 return new Unknown(machInst);
78 }
79 }
80
81 template <template <typename T> class Base>
82 StaticInstPtr
83 decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84 ExtMachInst machInst, IntRegIndex dest,
85 IntRegIndex op1, IntRegIndex op2)
86 {
87 if (notSigned) {
88 return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89 } else {
90 return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91 }
92 }
93
94 template <template <typename T> class Base>
95 StaticInstPtr
96 decodeNeonUThreeUSReg(unsigned size,
97 ExtMachInst machInst, IntRegIndex dest,
98 IntRegIndex op1, IntRegIndex op2)
99 {
100 switch (size) {
101 case 0:
102 return new Base<uint8_t>(machInst, dest, op1, op2);
103 case 1:
104 return new Base<uint16_t>(machInst, dest, op1, op2);
105 case 2:
106 return new Base<uint32_t>(machInst, dest, op1, op2);
107 default:
108 return new Unknown(machInst);
109 }
110 }
111
112 template <template <typename T> class Base>
113 StaticInstPtr
114 decodeNeonSThreeUSReg(unsigned size,
115 ExtMachInst machInst, IntRegIndex dest,
116 IntRegIndex op1, IntRegIndex op2)
117 {
118 switch (size) {
119 case 0:
120 return new Base<int8_t>(machInst, dest, op1, op2);
121 case 1:
122 return new Base<int16_t>(machInst, dest, op1, op2);
123 case 2:
124 return new Base<int32_t>(machInst, dest, op1, op2);
125 default:
126 return new Unknown(machInst);
127 }
128 }
129
130 template <template <typename T> class Base>
131 StaticInstPtr
132 decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133 ExtMachInst machInst, IntRegIndex dest,
134 IntRegIndex op1, IntRegIndex op2)
135 {
136 if (notSigned) {
137 return decodeNeonUThreeUSReg<Base>(
138 size, machInst, dest, op1, op2);
139 } else {
140 return decodeNeonSThreeUSReg<Base>(
141 size, machInst, dest, op1, op2);
142 }
143 }
144
145 template <template <typename T> class BaseD,
146 template <typename T> class BaseQ>
147 StaticInstPtr
148 decodeNeonUThreeSReg(bool q, unsigned size,
149 ExtMachInst machInst, IntRegIndex dest,
150 IntRegIndex op1, IntRegIndex op2)
151 {
152 if (q) {
153 return decodeNeonUThreeUSReg<BaseQ>(
154 size, machInst, dest, op1, op2);
155 } else {
156 return decodeNeonUThreeUSReg<BaseD>(
157 size, machInst, dest, op1, op2);
158 }
159 }
160
161 template <template <typename T> class BaseD,
162 template <typename T> class BaseQ>
163 StaticInstPtr
164 decodeNeonSThreeSReg(bool q, unsigned size,
165 ExtMachInst machInst, IntRegIndex dest,
166 IntRegIndex op1, IntRegIndex op2)
167 {
168 if (q) {
169 return decodeNeonSThreeUSReg<BaseQ>(
170 size, machInst, dest, op1, op2);
171 } else {
172 return decodeNeonSThreeUSReg<BaseD>(
173 size, machInst, dest, op1, op2);
174 }
175 }
176
177 template <template <typename T> class BaseD,
178 template <typename T> class BaseQ>
179 StaticInstPtr
180 decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181 ExtMachInst machInst, IntRegIndex dest,
182 IntRegIndex op1, IntRegIndex op2)
183 {
184 if (notSigned) {
185 return decodeNeonUThreeSReg<BaseD, BaseQ>(
186 q, size, machInst, dest, op1, op2);
187 } else {
188 return decodeNeonSThreeSReg<BaseD, BaseQ>(
189 q, size, machInst, dest, op1, op2);
190 }
191 }
192
193 template <template <typename T> class BaseD,
194 template <typename T> class BaseQ>
195 StaticInstPtr
196 decodeNeonUThreeReg(bool q, unsigned size,
197 ExtMachInst machInst, IntRegIndex dest,
198 IntRegIndex op1, IntRegIndex op2)
199 {
200 if (q) {
201 return decodeNeonUThreeUReg<BaseQ>(
202 size, machInst, dest, op1, op2);
203 } else {
204 return decodeNeonUThreeUReg<BaseD>(
205 size, machInst, dest, op1, op2);
206 }
207 }
208
209 template <template <typename T> class BaseD,
210 template <typename T> class BaseQ>
211 StaticInstPtr
212 decodeNeonSThreeReg(bool q, unsigned size,
213 ExtMachInst machInst, IntRegIndex dest,
214 IntRegIndex op1, IntRegIndex op2)
215 {
216 if (q) {
217 return decodeNeonSThreeUReg<BaseQ>(
218 size, machInst, dest, op1, op2);
219 } else {
220 return decodeNeonSThreeUReg<BaseD>(
221 size, machInst, dest, op1, op2);
222 }
223 }
224
225 template <template <typename T> class BaseD,
226 template <typename T> class BaseQ>
227 StaticInstPtr
228 decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229 ExtMachInst machInst, IntRegIndex dest,
230 IntRegIndex op1, IntRegIndex op2)
231 {
232 if (notSigned) {
233 return decodeNeonUThreeReg<BaseD, BaseQ>(
234 q, size, machInst, dest, op1, op2);
235 } else {
236 return decodeNeonSThreeReg<BaseD, BaseQ>(
237 q, size, machInst, dest, op1, op2);
238 }
239 }
240
241 template <template <typename T> class BaseD,
242 template <typename T> class BaseQ>
243 StaticInstPtr
244 decodeNeonUTwoShiftReg(bool q, unsigned size,
245 ExtMachInst machInst, IntRegIndex dest,
246 IntRegIndex op1, uint64_t imm)
247 {
248 if (q) {
249 switch (size) {
250 case 0:
251 return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252 case 1:
253 return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254 case 2:
255 return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256 case 3:
257 return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258 default:
259 return new Unknown(machInst);
260 }
261 } else {
262 switch (size) {
263 case 0:
264 return new BaseD<uint8_t>(machInst, dest, op1, imm);
265 case 1:
266 return new BaseD<uint16_t>(machInst, dest, op1, imm);
267 case 2:
268 return new BaseD<uint32_t>(machInst, dest, op1, imm);
269 case 3:
270 return new BaseD<uint64_t>(machInst, dest, op1, imm);
271 default:
272 return new Unknown(machInst);
273 }
274 }
275 }
276
277 template <template <typename T> class BaseD,
278 template <typename T> class BaseQ>
279 StaticInstPtr
280 decodeNeonSTwoShiftReg(bool q, unsigned size,
281 ExtMachInst machInst, IntRegIndex dest,
282 IntRegIndex op1, uint64_t imm)
283 {
284 if (q) {
285 switch (size) {
286 case 0:
287 return new BaseQ<int8_t>(machInst, dest, op1, imm);
288 case 1:
289 return new BaseQ<int16_t>(machInst, dest, op1, imm);
290 case 2:
291 return new BaseQ<int32_t>(machInst, dest, op1, imm);
292 case 3:
293 return new BaseQ<int64_t>(machInst, dest, op1, imm);
294 default:
295 return new Unknown(machInst);
296 }
297 } else {
298 switch (size) {
299 case 0:
300 return new BaseD<int8_t>(machInst, dest, op1, imm);
301 case 1:
302 return new BaseD<int16_t>(machInst, dest, op1, imm);
303 case 2:
304 return new BaseD<int32_t>(machInst, dest, op1, imm);
305 case 3:
306 return new BaseD<int64_t>(machInst, dest, op1, imm);
307 default:
308 return new Unknown(machInst);
309 }
310 }
311 }
312
313
314 template <template <typename T> class BaseD,
315 template <typename T> class BaseQ>
316 StaticInstPtr
317 decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318 ExtMachInst machInst, IntRegIndex dest,
319 IntRegIndex op1, uint64_t imm)
320 {
321 if (notSigned) {
322 return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323 q, size, machInst, dest, op1, imm);
324 } else {
325 return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326 q, size, machInst, dest, op1, imm);
327 }
328 }
329
330 template <template <typename T> class Base>
331 StaticInstPtr
332 decodeNeonUTwoShiftUSReg(unsigned size,
333 ExtMachInst machInst, IntRegIndex dest,
334 IntRegIndex op1, uint64_t imm)
335 {
336 switch (size) {
337 case 0:
338 return new Base<uint8_t>(machInst, dest, op1, imm);
339 case 1:
340 return new Base<uint16_t>(machInst, dest, op1, imm);
341 case 2:
342 return new Base<uint32_t>(machInst, dest, op1, imm);
343 default:
344 return new Unknown(machInst);
345 }
346 }
347
348 template <template <typename T> class BaseD,
349 template <typename T> class BaseQ>
350 StaticInstPtr
351 decodeNeonUTwoShiftSReg(bool q, unsigned size,
352 ExtMachInst machInst, IntRegIndex dest,
353 IntRegIndex op1, uint64_t imm)
354 {
355 if (q) {
356 return decodeNeonUTwoShiftUSReg<BaseQ>(
357 size, machInst, dest, op1, imm);
358 } else {
359 return decodeNeonUTwoShiftUSReg<BaseD>(
360 size, machInst, dest, op1, imm);
361 }
362 }
363
364 template <template <typename T> class Base>
365 StaticInstPtr
366 decodeNeonSTwoShiftUSReg(unsigned size,
367 ExtMachInst machInst, IntRegIndex dest,
368 IntRegIndex op1, uint64_t imm)
369 {
370 switch (size) {
371 case 0:
372 return new Base<int8_t>(machInst, dest, op1, imm);
373 case 1:
374 return new Base<int16_t>(machInst, dest, op1, imm);
375 case 2:
376 return new Base<int32_t>(machInst, dest, op1, imm);
377 default:
378 return new Unknown(machInst);
379 }
380 }
381
382 template <template <typename T> class BaseD,
383 template <typename T> class BaseQ>
384 StaticInstPtr
385 decodeNeonSTwoShiftSReg(bool q, unsigned size,
386 ExtMachInst machInst, IntRegIndex dest,
387 IntRegIndex op1, uint64_t imm)
388 {
389 if (q) {
390 return decodeNeonSTwoShiftUSReg<BaseQ>(
391 size, machInst, dest, op1, imm);
392 } else {
393 return decodeNeonSTwoShiftUSReg<BaseD>(
394 size, machInst, dest, op1, imm);
395 }
396 }
397
398 template <template <typename T> class BaseD,
399 template <typename T> class BaseQ>
400 StaticInstPtr
401 decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402 ExtMachInst machInst, IntRegIndex dest,
403 IntRegIndex op1, uint64_t imm)
404 {
405 if (notSigned) {
406 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407 q, size, machInst, dest, op1, imm);
408 } else {
409 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410 q, size, machInst, dest, op1, imm);
411 }
412 }
413
414 template <template <typename T> class Base>
415 StaticInstPtr
416 decodeNeonUTwoMiscUSReg(unsigned size,
417 ExtMachInst machInst, IntRegIndex dest,
418 IntRegIndex op1)
419 {
420 switch (size) {
421 case 0:
422 return new Base<uint8_t>(machInst, dest, op1);
423 case 1:
424 return new Base<uint16_t>(machInst, dest, op1);
425 case 2:
426 return new Base<uint32_t>(machInst, dest, op1);
427 default:
428 return new Unknown(machInst);
429 }
430 }
431
432 template <template <typename T> class Base>
433 StaticInstPtr
434 decodeNeonSTwoMiscUSReg(unsigned size,
435 ExtMachInst machInst, IntRegIndex dest,
436 IntRegIndex op1)
437 {
438 switch (size) {
439 case 0:
440 return new Base<int8_t>(machInst, dest, op1);
441 case 1:
442 return new Base<int16_t>(machInst, dest, op1);
443 case 2:
444 return new Base<int32_t>(machInst, dest, op1);
445 default:
446 return new Unknown(machInst);
447 }
448 }
449
450 template <template <typename T> class BaseD,
451 template <typename T> class BaseQ>
452 StaticInstPtr
453 decodeNeonUTwoMiscSReg(bool q, unsigned size,
454 ExtMachInst machInst, IntRegIndex dest,
455 IntRegIndex op1)
456 {
457 if (q) {
458 return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459 } else {
460 return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461 }
462 }
463
464 template <template <typename T> class BaseD,
465 template <typename T> class BaseQ>
466 StaticInstPtr
467 decodeNeonSTwoMiscSReg(bool q, unsigned size,
468 ExtMachInst machInst, IntRegIndex dest,
469 IntRegIndex op1)
470 {
471 if (q) {
472 return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473 } else {
474 return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475 }
476 }
477
478 template <template <typename T> class Base>
479 StaticInstPtr
480 decodeNeonUTwoMiscUReg(unsigned size,
481 ExtMachInst machInst, IntRegIndex dest,
482 IntRegIndex op1)
483 {
484 switch (size) {
485 case 0:
486 return new Base<uint8_t>(machInst, dest, op1);
487 case 1:
488 return new Base<uint16_t>(machInst, dest, op1);
489 case 2:
490 return new Base<uint32_t>(machInst, dest, op1);
491 case 3:
492 return new Base<uint64_t>(machInst, dest, op1);
493 default:
494 return new Unknown(machInst);
495 }
496 }
497
498 template <template <typename T> class Base>
499 StaticInstPtr
500 decodeNeonSTwoMiscUReg(unsigned size,
501 ExtMachInst machInst, IntRegIndex dest,
502 IntRegIndex op1)
503 {
504 switch (size) {
505 case 0:
506 return new Base<int8_t>(machInst, dest, op1);
507 case 1:
508 return new Base<int16_t>(machInst, dest, op1);
509 case 2:
510 return new Base<int32_t>(machInst, dest, op1);
511 case 3:
512 return new Base<int64_t>(machInst, dest, op1);
513 default:
514 return new Unknown(machInst);
515 }
516 }
517
518 template <template <typename T> class BaseD,
519 template <typename T> class BaseQ>
520 StaticInstPtr
521 decodeNeonSTwoMiscReg(bool q, unsigned size,
522 ExtMachInst machInst, IntRegIndex dest,
523 IntRegIndex op1)
524 {
525 if (q) {
526 return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527 } else {
528 return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529 }
530 }
531
532 template <template <typename T> class BaseD,
533 template <typename T> class BaseQ>
534 StaticInstPtr
535 decodeNeonUTwoMiscReg(bool q, unsigned size,
536 ExtMachInst machInst, IntRegIndex dest,
537 IntRegIndex op1)
538 {
539 if (q) {
540 return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541 } else {
542 return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543 }
544 }
545
546 template <template <typename T> class BaseD,
547 template <typename T> class BaseQ>
548 StaticInstPtr
549 decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550 ExtMachInst machInst, IntRegIndex dest,
551 IntRegIndex op1)
552 {
553 if (notSigned) {
554 return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555 q, size, machInst, dest, op1);
556 } else {
557 return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558 q, size, machInst, dest, op1);
559 }
560 }
561
562}};
563
564output exec {{
565 static float
566 vcgtFunc(float op1, float op2)
567 {
568 if (isSnan(op1) || isSnan(op2))
569 return 2.0;
570 return (op1 > op2) ? 0.0 : 1.0;
571 }
572
573 static float
574 vcgeFunc(float op1, float op2)
575 {
576 if (isSnan(op1) || isSnan(op2))
577 return 2.0;
578 return (op1 >= op2) ? 0.0 : 1.0;
579 }
580
581 static float
582 vceqFunc(float op1, float op2)
583 {
584 if (isSnan(op1) || isSnan(op2))
585 return 2.0;
586 return (op1 == op2) ? 0.0 : 1.0;
587 }
588
589 static float
590 vcleFunc(float op1, float op2)
591 {
592 if (isSnan(op1) || isSnan(op2))
593 return 2.0;
594 return (op1 <= op2) ? 0.0 : 1.0;
595 }
596
597 static float
598 vcltFunc(float op1, float op2)
599 {
600 if (isSnan(op1) || isSnan(op2))
601 return 2.0;
602 return (op1 < op2) ? 0.0 : 1.0;
603 }
604
605 static float
606 vacgtFunc(float op1, float op2)
607 {
608 if (isSnan(op1) || isSnan(op2))
609 return 2.0;
610 return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611 }
612
613 static float
614 vacgeFunc(float op1, float op2)
615 {
616 if (isSnan(op1) || isSnan(op2))
617 return 2.0;
618 return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619 }
620}};
621
622let {{
623 simdEnabledCheckCode = '''
624 if (!neonEnabled(Cpacr, Cpsr, Fpexc))
625 return disabledFault();
626 '''
627}};
628
629let {{
630
631 header_output = ""
632 exec_output = ""
633
634 smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
635 unsignedTypes = smallUnsignedTypes + ("uint64_t",)
636 smallSignedTypes = ("int8_t", "int16_t", "int32_t")
637 signedTypes = smallSignedTypes + ("int64_t",)
638 smallTypes = smallUnsignedTypes + smallSignedTypes
639 allTypes = unsignedTypes + signedTypes
640
641 def threeEqualRegInst(name, Name, types, rCount, op,
642 readDest=False, pairwise=False):
643 global header_output, exec_output
644 eWalkCode = simdEnabledCheckCode + '''
645 RegVect srcReg1, srcReg2, destReg;
646 '''
647 for reg in range(rCount):
648 eWalkCode += '''
649 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
650 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
651 ''' % { "reg" : reg }
652 if readDest:
653 eWalkCode += '''
654 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
655 ''' % { "reg" : reg }
656 readDestCode = ''
657 if readDest:
658 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
659 if pairwise:
660 eWalkCode += '''
661 for (unsigned i = 0; i < eCount; i++) {
662 Element srcElem1 = gtoh(2 * i < eCount ?
663 srcReg1.elements[2 * i] :
664 srcReg2.elements[2 * i - eCount]);
665 Element srcElem2 = gtoh(2 * i < eCount ?
666 srcReg1.elements[2 * i + 1] :
667 srcReg2.elements[2 * i + 1 - eCount]);
668 Element destElem;
669 %(readDest)s
670 %(op)s
671 destReg.elements[i] = htog(destElem);
672 }
673 ''' % { "op" : op, "readDest" : readDestCode }
674 else:
675 eWalkCode += '''
676 for (unsigned i = 0; i < eCount; i++) {
677 Element srcElem1 = gtoh(srcReg1.elements[i]);
678 Element srcElem2 = gtoh(srcReg2.elements[i]);
679 Element destElem;
680 %(readDest)s
681 %(op)s
682 destReg.elements[i] = htog(destElem);
683 }
684 ''' % { "op" : op, "readDest" : readDestCode }
685 for reg in range(rCount):
686 eWalkCode += '''
687 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
688 ''' % { "reg" : reg }
689 iop = InstObjParams(name, Name,
690 "RegRegRegOp",
691 { "code": eWalkCode,
692 "r_count": rCount,
693 "predicate_test": predicateTest }, [])
694 header_output += NeonRegRegRegOpDeclare.subst(iop)
695 exec_output += NeonEqualRegExecute.subst(iop)
696 for type in types:
697 substDict = { "targs" : type,
698 "class_name" : Name }
699 exec_output += NeonExecDeclare.subst(substDict)
700
701 def threeEqualRegInstFp(name, Name, types, rCount, op,
702 readDest=False, pairwise=False, toInt=False):
703 global header_output, exec_output
704 eWalkCode = simdEnabledCheckCode + '''
705 typedef FloatReg FloatVect[rCount];
706 FloatVect srcRegs1, srcRegs2;
707 '''
708 if toInt:
709 eWalkCode += 'RegVect destRegs;\n'
710 else:
711 eWalkCode += 'FloatVect destRegs;\n'
712 for reg in range(rCount):
713 eWalkCode += '''
714 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
715 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
716 ''' % { "reg" : reg }
717 if readDest:
718 if toInt:
719 eWalkCode += '''
720 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
721 ''' % { "reg" : reg }
722 else:
723 eWalkCode += '''
724 destRegs[%(reg)d] = FpDestP%(reg)d;
725 ''' % { "reg" : reg }
726 readDestCode = ''
727 if readDest:
728 readDestCode = 'destReg = destRegs[r];'
729 destType = 'FloatReg'
730 writeDest = 'destRegs[r] = destReg;'
731 if toInt:
732 destType = 'FloatRegBits'
733 writeDest = 'destRegs.regs[r] = destReg;'
734 if pairwise:
735 eWalkCode += '''
736 for (unsigned r = 0; r < rCount; r++) {
737 FloatReg srcReg1 = (2 * r < rCount) ?
738 srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
739 FloatReg srcReg2 = (2 * r < rCount) ?
740 srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
741 %(destType)s destReg;
742 %(readDest)s
743 %(op)s
744 %(writeDest)s
745 }
746 ''' % { "op" : op,
747 "readDest" : readDestCode,
748 "destType" : destType,
749 "writeDest" : writeDest }
750 else:
751 eWalkCode += '''
752 for (unsigned r = 0; r < rCount; r++) {
753 FloatReg srcReg1 = srcRegs1[r];
754 FloatReg srcReg2 = srcRegs2[r];
755 %(destType)s destReg;
756 %(readDest)s
757 %(op)s
758 %(writeDest)s
759 }
760 ''' % { "op" : op,
761 "readDest" : readDestCode,
762 "destType" : destType,
763 "writeDest" : writeDest }
764 for reg in range(rCount):
765 if toInt:
766 eWalkCode += '''
767 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
768 ''' % { "reg" : reg }
769 else:
770 eWalkCode += '''
771 FpDestP%(reg)d = destRegs[%(reg)d];
772 ''' % { "reg" : reg }
773 iop = InstObjParams(name, Name,
774 "FpRegRegRegOp",
775 { "code": eWalkCode,
776 "r_count": rCount,
777 "predicate_test": predicateTest }, [])
778 header_output += NeonRegRegRegOpDeclare.subst(iop)
779 exec_output += NeonEqualRegExecute.subst(iop)
780 for type in types:
781 substDict = { "targs" : type,
782 "class_name" : Name }
783 exec_output += NeonExecDeclare.subst(substDict)
784
785 def threeUnequalRegInst(name, Name, types, op,
786 bigSrc1, bigSrc2, bigDest, readDest):
787 global header_output, exec_output
788 src1Cnt = src2Cnt = destCnt = 2
789 src1Prefix = src2Prefix = destPrefix = ''
790 if bigSrc1:
791 src1Cnt = 4
792 src1Prefix = 'Big'
793 if bigSrc2:
794 src2Cnt = 4
795 src2Prefix = 'Big'
796 if bigDest:
797 destCnt = 4
798 destPrefix = 'Big'
799 eWalkCode = simdEnabledCheckCode + '''
800 %sRegVect srcReg1;
801 %sRegVect srcReg2;
802 %sRegVect destReg;
803 ''' % (src1Prefix, src2Prefix, destPrefix)
804 for reg in range(src1Cnt):
805 eWalkCode += '''
806 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
807 ''' % { "reg" : reg }
808 for reg in range(src2Cnt):
809 eWalkCode += '''
810 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
811 ''' % { "reg" : reg }
812 if readDest:
813 for reg in range(destCnt):
814 eWalkCode += '''
815 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
816 ''' % { "reg" : reg }
817 readDestCode = ''
818 if readDest:
819 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
820 eWalkCode += '''
821 for (unsigned i = 0; i < eCount; i++) {
822 %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
823 %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
824 %(destPrefix)sElement destElem;
825 %(readDest)s
826 %(op)s
827 destReg.elements[i] = htog(destElem);
828 }
829 ''' % { "op" : op, "readDest" : readDestCode,
830 "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
831 "destPrefix" : destPrefix }
832 for reg in range(destCnt):
833 eWalkCode += '''
834 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
835 ''' % { "reg" : reg }
836 iop = InstObjParams(name, Name,
837 "RegRegRegOp",
838 { "code": eWalkCode,
839 "r_count": 2,
840 "predicate_test": predicateTest }, [])
841 header_output += NeonRegRegRegOpDeclare.subst(iop)
842 exec_output += NeonUnequalRegExecute.subst(iop)
843 for type in types:
844 substDict = { "targs" : type,
845 "class_name" : Name }
846 exec_output += NeonExecDeclare.subst(substDict)
847
848 def threeRegNarrowInst(name, Name, types, op, readDest=False):
849 threeUnequalRegInst(name, Name, types, op,
850 True, True, False, readDest)
851
852 def threeRegLongInst(name, Name, types, op, readDest=False):
853 threeUnequalRegInst(name, Name, types, op,
854 False, False, True, readDest)
855
856 def threeRegWideInst(name, Name, types, op, readDest=False):
857 threeUnequalRegInst(name, Name, types, op,
858 True, False, True, readDest)
859
860 def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
861 global header_output, exec_output
862 eWalkCode = simdEnabledCheckCode + '''
863 RegVect srcReg1, srcReg2, destReg;
864 '''
865 for reg in range(rCount):
866 eWalkCode += '''
867 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
868 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
869 ''' % { "reg" : reg }
870 if readDest:
871 eWalkCode += '''
872 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
873 ''' % { "reg" : reg }
874 readDestCode = ''
875 if readDest:
876 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
877 eWalkCode += '''
878 assert(imm >= 0 && imm < eCount);
879 for (unsigned i = 0; i < eCount; i++) {
880 Element srcElem1 = gtoh(srcReg1.elements[i]);
881 Element srcElem2 = gtoh(srcReg2.elements[imm]);
882 Element destElem;
883 %(readDest)s
884 %(op)s
885 destReg.elements[i] = htog(destElem);
886 }
887 ''' % { "op" : op, "readDest" : readDestCode }
888 for reg in range(rCount):
889 eWalkCode += '''
890 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
891 ''' % { "reg" : reg }
892 iop = InstObjParams(name, Name,
893 "RegRegRegImmOp",
894 { "code": eWalkCode,
895 "r_count": rCount,
896 "predicate_test": predicateTest }, [])
897 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
898 exec_output += NeonEqualRegExecute.subst(iop)
899 for type in types:
900 substDict = { "targs" : type,
901 "class_name" : Name }
902 exec_output += NeonExecDeclare.subst(substDict)
903
904 def twoRegLongInst(name, Name, types, op, readDest=False):
905 global header_output, exec_output
906 rCount = 2
907 eWalkCode = simdEnabledCheckCode + '''
908 RegVect srcReg1, srcReg2;
909 BigRegVect destReg;
910 '''
911 for reg in range(rCount):
912 eWalkCode += '''
913 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
914 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
915 ''' % { "reg" : reg }
916 if readDest:
917 for reg in range(2 * rCount):
918 eWalkCode += '''
919 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
920 ''' % { "reg" : reg }
921 readDestCode = ''
922 if readDest:
923 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
924 eWalkCode += '''
925 assert(imm >= 0 && imm < eCount);
926 for (unsigned i = 0; i < eCount; i++) {
927 Element srcElem1 = gtoh(srcReg1.elements[i]);
928 Element srcElem2 = gtoh(srcReg2.elements[imm]);
929 BigElement destElem;
930 %(readDest)s
931 %(op)s
932 destReg.elements[i] = htog(destElem);
933 }
934 ''' % { "op" : op, "readDest" : readDestCode }
935 for reg in range(2 * rCount):
936 eWalkCode += '''
937 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
938 ''' % { "reg" : reg }
939 iop = InstObjParams(name, Name,
940 "RegRegRegImmOp",
941 { "code": eWalkCode,
942 "r_count": rCount,
943 "predicate_test": predicateTest }, [])
944 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
945 exec_output += NeonUnequalRegExecute.subst(iop)
946 for type in types:
947 substDict = { "targs" : type,
948 "class_name" : Name }
949 exec_output += NeonExecDeclare.subst(substDict)
950
951 def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
952 global header_output, exec_output
953 eWalkCode = simdEnabledCheckCode + '''
954 typedef FloatReg FloatVect[rCount];
955 FloatVect srcRegs1, srcRegs2, destRegs;
956 '''
957 for reg in range(rCount):
958 eWalkCode += '''
959 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
960 srcRegs2[%(reg)d] = FpOp2P%(reg)d;
961 ''' % { "reg" : reg }
962 if readDest:
963 eWalkCode += '''
964 destRegs[%(reg)d] = FpDestP%(reg)d;
965 ''' % { "reg" : reg }
966 readDestCode = ''
967 if readDest:
968 readDestCode = 'destReg = destRegs[i];'
969 eWalkCode += '''
970 assert(imm >= 0 && imm < rCount);
971 for (unsigned i = 0; i < rCount; i++) {
972 FloatReg srcReg1 = srcRegs1[i];
973 FloatReg srcReg2 = srcRegs2[imm];
974 FloatReg destReg;
975 %(readDest)s
976 %(op)s
977 destRegs[i] = destReg;
978 }
979 ''' % { "op" : op, "readDest" : readDestCode }
980 for reg in range(rCount):
981 eWalkCode += '''
982 FpDestP%(reg)d = destRegs[%(reg)d];
983 ''' % { "reg" : reg }
984 iop = InstObjParams(name, Name,
985 "FpRegRegRegImmOp",
986 { "code": eWalkCode,
987 "r_count": rCount,
988 "predicate_test": predicateTest }, [])
989 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
990 exec_output += NeonEqualRegExecute.subst(iop)
991 for type in types:
992 substDict = { "targs" : type,
993 "class_name" : Name }
994 exec_output += NeonExecDeclare.subst(substDict)
995
996 def twoRegShiftInst(name, Name, types, rCount, op,
997 readDest=False, toInt=False, fromInt=False):
998 global header_output, exec_output
999 eWalkCode = simdEnabledCheckCode + '''
1000 RegVect srcRegs1, destRegs;
1001 '''
1002 for reg in range(rCount):
1003 eWalkCode += '''
1004 srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1005 ''' % { "reg" : reg }
1006 if readDest:
1007 eWalkCode += '''
1008 destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1009 ''' % { "reg" : reg }
1010 readDestCode = ''
1011 if readDest:
1012 readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1013 if toInt:
1014 readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1015 readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1016 if fromInt:
1017 readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1018 declDest = 'Element destElem;'
1019 writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1020 if toInt:
1021 declDest = 'FloatRegBits destReg;'
1022 writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1023 eWalkCode += '''
1024 for (unsigned i = 0; i < eCount; i++) {
1025 %(readOp)s
1026 %(declDest)s
1027 %(readDest)s
1028 %(op)s
1029 %(writeDest)s
1030 }
1031 ''' % { "readOp" : readOpCode,
1032 "declDest" : declDest,
1033 "readDest" : readDestCode,
1034 "op" : op,
1035 "writeDest" : writeDestCode }
1036 for reg in range(rCount):
1037 eWalkCode += '''
1038 FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1039 ''' % { "reg" : reg }
1040 iop = InstObjParams(name, Name,
1041 "RegRegImmOp",
1042 { "code": eWalkCode,
1043 "r_count": rCount,
1044 "predicate_test": predicateTest }, [])
1045 header_output += NeonRegRegImmOpDeclare.subst(iop)
1046 exec_output += NeonEqualRegExecute.subst(iop)
1047 for type in types:
1048 substDict = { "targs" : type,
1049 "class_name" : Name }
1050 exec_output += NeonExecDeclare.subst(substDict)
1051
1052 def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1053 global header_output, exec_output
1054 eWalkCode = simdEnabledCheckCode + '''
1055 BigRegVect srcReg1;
1056 RegVect destReg;
1057 '''
1058 for reg in range(4):
1059 eWalkCode += '''
1060 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1061 ''' % { "reg" : reg }
1062 if readDest:
1063 for reg in range(2):
1064 eWalkCode += '''
1065 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1066 ''' % { "reg" : reg }
1067 readDestCode = ''
1068 if readDest:
1069 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1070 eWalkCode += '''
1071 for (unsigned i = 0; i < eCount; i++) {
1072 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1073 Element destElem;
1074 %(readDest)s
1075 %(op)s
1076 destReg.elements[i] = htog(destElem);
1077 }
1078 ''' % { "op" : op, "readDest" : readDestCode }
1079 for reg in range(2):
1080 eWalkCode += '''
1081 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1082 ''' % { "reg" : reg }
1083 iop = InstObjParams(name, Name,
1084 "RegRegImmOp",
1085 { "code": eWalkCode,
1086 "r_count": 2,
1087 "predicate_test": predicateTest }, [])
1088 header_output += NeonRegRegImmOpDeclare.subst(iop)
1089 exec_output += NeonUnequalRegExecute.subst(iop)
1090 for type in types:
1091 substDict = { "targs" : type,
1092 "class_name" : Name }
1093 exec_output += NeonExecDeclare.subst(substDict)
1094
1095 def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1096 global header_output, exec_output
1097 eWalkCode = simdEnabledCheckCode + '''
1098 RegVect srcReg1;
1099 BigRegVect destReg;
1100 '''
1101 for reg in range(2):
1102 eWalkCode += '''
1103 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1104 ''' % { "reg" : reg }
1105 if readDest:
1106 for reg in range(4):
1107 eWalkCode += '''
1108 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1109 ''' % { "reg" : reg }
1110 readDestCode = ''
1111 if readDest:
1112 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1113 eWalkCode += '''
1114 for (unsigned i = 0; i < eCount; i++) {
1115 Element srcElem1 = gtoh(srcReg1.elements[i]);
1116 BigElement destElem;
1117 %(readDest)s
1118 %(op)s
1119 destReg.elements[i] = htog(destElem);
1120 }
1121 ''' % { "op" : op, "readDest" : readDestCode }
1122 for reg in range(4):
1123 eWalkCode += '''
1124 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1125 ''' % { "reg" : reg }
1126 iop = InstObjParams(name, Name,
1127 "RegRegImmOp",
1128 { "code": eWalkCode,
1129 "r_count": 2,
1130 "predicate_test": predicateTest }, [])
1131 header_output += NeonRegRegImmOpDeclare.subst(iop)
1132 exec_output += NeonUnequalRegExecute.subst(iop)
1133 for type in types:
1134 substDict = { "targs" : type,
1135 "class_name" : Name }
1136 exec_output += NeonExecDeclare.subst(substDict)
1137
1138 def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1139 global header_output, exec_output
1140 eWalkCode = simdEnabledCheckCode + '''
1141 RegVect srcReg1, destReg;
1142 '''
1143 for reg in range(rCount):
1144 eWalkCode += '''
1145 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1146 ''' % { "reg" : reg }
1147 if readDest:
1148 eWalkCode += '''
1149 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1150 ''' % { "reg" : reg }
1151 readDestCode = ''
1152 if readDest:
1153 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1154 eWalkCode += '''
1155 for (unsigned i = 0; i < eCount; i++) {
1156 unsigned j = i;
1157 Element srcElem1 = gtoh(srcReg1.elements[i]);
1158 Element destElem;
1159 %(readDest)s
1160 %(op)s
1161 destReg.elements[j] = htog(destElem);
1162 }
1163 ''' % { "op" : op, "readDest" : readDestCode }
1164 for reg in range(rCount):
1165 eWalkCode += '''
1166 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1167 ''' % { "reg" : reg }
1168 iop = InstObjParams(name, Name,
1169 "RegRegOp",
1170 { "code": eWalkCode,
1171 "r_count": rCount,
1172 "predicate_test": predicateTest }, [])
1173 header_output += NeonRegRegOpDeclare.subst(iop)
1174 exec_output += NeonEqualRegExecute.subst(iop)
1175 for type in types:
1176 substDict = { "targs" : type,
1177 "class_name" : Name }
1178 exec_output += NeonExecDeclare.subst(substDict)
1179
1180 def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1181 global header_output, exec_output
1182 eWalkCode = simdEnabledCheckCode + '''
1183 RegVect srcReg1, destReg;
1184 '''
1185 for reg in range(rCount):
1186 eWalkCode += '''
1187 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1188 ''' % { "reg" : reg }
1189 if readDest:
1190 eWalkCode += '''
1191 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1192 ''' % { "reg" : reg }
1193 readDestCode = ''
1194 if readDest:
1195 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1196 eWalkCode += '''
1197 for (unsigned i = 0; i < eCount; i++) {
1198 Element srcElem1 = gtoh(srcReg1.elements[imm]);
1199 Element destElem;
1200 %(readDest)s
1201 %(op)s
1202 destReg.elements[i] = htog(destElem);
1203 }
1204 ''' % { "op" : op, "readDest" : readDestCode }
1205 for reg in range(rCount):
1206 eWalkCode += '''
1207 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1208 ''' % { "reg" : reg }
1209 iop = InstObjParams(name, Name,
1210 "RegRegImmOp",
1211 { "code": eWalkCode,
1212 "r_count": rCount,
1213 "predicate_test": predicateTest }, [])
1214 header_output += NeonRegRegImmOpDeclare.subst(iop)
1215 exec_output += NeonEqualRegExecute.subst(iop)
1216 for type in types:
1217 substDict = { "targs" : type,
1218 "class_name" : Name }
1219 exec_output += NeonExecDeclare.subst(substDict)
1220
1221 def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1222 global header_output, exec_output
1223 eWalkCode = simdEnabledCheckCode + '''
1224 RegVect srcReg1, destReg;
1225 '''
1226 for reg in range(rCount):
1227 eWalkCode += '''
1228 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1229 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1230 ''' % { "reg" : reg }
1231 if readDest:
1232 eWalkCode += '''
1233 ''' % { "reg" : reg }
1234 readDestCode = ''
1235 if readDest:
1236 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1237 eWalkCode += op
1238 for reg in range(rCount):
1239 eWalkCode += '''
1240 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1241 FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1242 ''' % { "reg" : reg }
1243 iop = InstObjParams(name, Name,
1244 "RegRegOp",
1245 { "code": eWalkCode,
1246 "r_count": rCount,
1247 "predicate_test": predicateTest }, [])
1248 header_output += NeonRegRegOpDeclare.subst(iop)
1249 exec_output += NeonEqualRegExecute.subst(iop)
1250 for type in types:
1251 substDict = { "targs" : type,
1252 "class_name" : Name }
1253 exec_output += NeonExecDeclare.subst(substDict)
1254
1255 def twoRegMiscInstFp(name, Name, types, rCount, op,
1256 readDest=False, toInt=False):
1257 global header_output, exec_output
1258 eWalkCode = simdEnabledCheckCode + '''
1259 typedef FloatReg FloatVect[rCount];
1260 FloatVect srcRegs1;
1261 '''
1262 if toInt:
1263 eWalkCode += 'RegVect destRegs;\n'
1264 else:
1265 eWalkCode += 'FloatVect destRegs;\n'
1266 for reg in range(rCount):
1267 eWalkCode += '''
1268 srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1269 ''' % { "reg" : reg }
1270 if readDest:
1271 if toInt:
1272 eWalkCode += '''
1273 destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1274 ''' % { "reg" : reg }
1275 else:
1276 eWalkCode += '''
1277 destRegs[%(reg)d] = FpDestP%(reg)d;
1278 ''' % { "reg" : reg }
1279 readDestCode = ''
1280 if readDest:
1281 readDestCode = 'destReg = destRegs[i];'
1282 destType = 'FloatReg'
1283 writeDest = 'destRegs[r] = destReg;'
1284 if toInt:
1285 destType = 'FloatRegBits'
1286 writeDest = 'destRegs.regs[r] = destReg;'
1287 eWalkCode += '''
1288 for (unsigned r = 0; r < rCount; r++) {
1289 FloatReg srcReg1 = srcRegs1[r];
1290 %(destType)s destReg;
1291 %(readDest)s
1292 %(op)s
1293 %(writeDest)s
1294 }
1295 ''' % { "op" : op,
1296 "readDest" : readDestCode,
1297 "destType" : destType,
1298 "writeDest" : writeDest }
1299 for reg in range(rCount):
1300 if toInt:
1301 eWalkCode += '''
1302 FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1303 ''' % { "reg" : reg }
1304 else:
1305 eWalkCode += '''
1306 FpDestP%(reg)d = destRegs[%(reg)d];
1307 ''' % { "reg" : reg }
1308 iop = InstObjParams(name, Name,
1309 "FpRegRegOp",
1310 { "code": eWalkCode,
1311 "r_count": rCount,
1312 "predicate_test": predicateTest }, [])
1313 header_output += NeonRegRegOpDeclare.subst(iop)
1314 exec_output += NeonEqualRegExecute.subst(iop)
1315 for type in types:
1316 substDict = { "targs" : type,
1317 "class_name" : Name }
1318 exec_output += NeonExecDeclare.subst(substDict)
1319
1320 def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1321 global header_output, exec_output
1322 eWalkCode = simdEnabledCheckCode + '''
1323 RegVect srcRegs;
1324 BigRegVect destReg;
1325 '''
1326 for reg in range(rCount):
1327 eWalkCode += '''
1328 srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1329 ''' % { "reg" : reg }
1330 if readDest:
1331 eWalkCode += '''
1332 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1333 ''' % { "reg" : reg }
1334 readDestCode = ''
1335 if readDest:
1336 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1337 eWalkCode += '''
1338 for (unsigned i = 0; i < eCount / 2; i++) {
1339 Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1340 Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1341 BigElement destElem;
1342 %(readDest)s
1343 %(op)s
1344 destReg.elements[i] = htog(destElem);
1345 }
1346 ''' % { "op" : op, "readDest" : readDestCode }
1347 for reg in range(rCount):
1348 eWalkCode += '''
1349 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1350 ''' % { "reg" : reg }
1351 iop = InstObjParams(name, Name,
1352 "RegRegOp",
1353 { "code": eWalkCode,
1354 "r_count": rCount,
1355 "predicate_test": predicateTest }, [])
1356 header_output += NeonRegRegOpDeclare.subst(iop)
1357 exec_output += NeonUnequalRegExecute.subst(iop)
1358 for type in types:
1359 substDict = { "targs" : type,
1360 "class_name" : Name }
1361 exec_output += NeonExecDeclare.subst(substDict)
1362
1363 def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1364 global header_output, exec_output
1365 eWalkCode = simdEnabledCheckCode + '''
1366 BigRegVect srcReg1;
1367 RegVect destReg;
1368 '''
1369 for reg in range(4):
1370 eWalkCode += '''
1371 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1372 ''' % { "reg" : reg }
1373 if readDest:
1374 for reg in range(2):
1375 eWalkCode += '''
1376 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1377 ''' % { "reg" : reg }
1378 readDestCode = ''
1379 if readDest:
1380 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1381 eWalkCode += '''
1382 for (unsigned i = 0; i < eCount; i++) {
1383 BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1384 Element destElem;
1385 %(readDest)s
1386 %(op)s
1387 destReg.elements[i] = htog(destElem);
1388 }
1389 ''' % { "op" : op, "readDest" : readDestCode }
1390 for reg in range(2):
1391 eWalkCode += '''
1392 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1393 ''' % { "reg" : reg }
1394 iop = InstObjParams(name, Name,
1395 "RegRegOp",
1396 { "code": eWalkCode,
1397 "r_count": 2,
1398 "predicate_test": predicateTest }, [])
1399 header_output += NeonRegRegOpDeclare.subst(iop)
1400 exec_output += NeonUnequalRegExecute.subst(iop)
1401 for type in types:
1402 substDict = { "targs" : type,
1403 "class_name" : Name }
1404 exec_output += NeonExecDeclare.subst(substDict)
1405
1406 def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1407 global header_output, exec_output
1408 eWalkCode = simdEnabledCheckCode + '''
1409 RegVect destReg;
1410 '''
1411 if readDest:
1412 for reg in range(rCount):
1413 eWalkCode += '''
1414 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1415 ''' % { "reg" : reg }
1416 readDestCode = ''
1417 if readDest:
1418 readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1419 eWalkCode += '''
1420 for (unsigned i = 0; i < eCount; i++) {
1421 Element destElem;
1422 %(readDest)s
1423 %(op)s
1424 destReg.elements[i] = htog(destElem);
1425 }
1426 ''' % { "op" : op, "readDest" : readDestCode }
1427 for reg in range(rCount):
1428 eWalkCode += '''
1429 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1430 ''' % { "reg" : reg }
1431 iop = InstObjParams(name, Name,
1432 "RegImmOp",
1433 { "code": eWalkCode,
1434 "r_count": rCount,
1435 "predicate_test": predicateTest }, [])
1436 header_output += NeonRegImmOpDeclare.subst(iop)
1437 exec_output += NeonEqualRegExecute.subst(iop)
1438 for type in types:
1439 substDict = { "targs" : type,
1440 "class_name" : Name }
1441 exec_output += NeonExecDeclare.subst(substDict)
1442
1443 def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1444 global header_output, exec_output
1445 eWalkCode = simdEnabledCheckCode + '''
1446 RegVect srcReg1;
1447 BigRegVect destReg;
1448 '''
1449 for reg in range(2):
1450 eWalkCode += '''
1451 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1452 ''' % { "reg" : reg }
1453 if readDest:
1454 for reg in range(4):
1455 eWalkCode += '''
1456 destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1457 ''' % { "reg" : reg }
1458 readDestCode = ''
1459 if readDest:
1460 readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1461 eWalkCode += '''
1462 for (unsigned i = 0; i < eCount; i++) {
1463 Element srcElem1 = gtoh(srcReg1.elements[i]);
1464 BigElement destElem;
1465 %(readDest)s
1466 %(op)s
1467 destReg.elements[i] = htog(destElem);
1468 }
1469 ''' % { "op" : op, "readDest" : readDestCode }
1470 for reg in range(4):
1471 eWalkCode += '''
1472 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1473 ''' % { "reg" : reg }
1474 iop = InstObjParams(name, Name,
1475 "RegRegOp",
1476 { "code": eWalkCode,
1477 "r_count": 2,
1478 "predicate_test": predicateTest }, [])
1479 header_output += NeonRegRegOpDeclare.subst(iop)
1480 exec_output += NeonUnequalRegExecute.subst(iop)
1481 for type in types:
1482 substDict = { "targs" : type,
1483 "class_name" : Name }
1484 exec_output += NeonExecDeclare.subst(substDict)
1485
1486 vhaddCode = '''
1487 Element carryBit =
1488 (((unsigned)srcElem1 & 0x1) +
1489 ((unsigned)srcElem2 & 0x1)) >> 1;
1490 // Use division instead of a shift to ensure the sign extension works
1491 // right. The compiler will figure out if it can be a shift. Mask the
1492 // inputs so they get truncated correctly.
1493 destElem = (((srcElem1 & ~(Element)1) / 2) +
1494 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1495 '''
1496 threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1497 threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1498
1499 vrhaddCode = '''
1500 Element carryBit =
1501 (((unsigned)srcElem1 & 0x1) +
1502 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1503 // Use division instead of a shift to ensure the sign extension works
1504 // right. The compiler will figure out if it can be a shift. Mask the
1505 // inputs so they get truncated correctly.
1506 destElem = (((srcElem1 & ~(Element)1) / 2) +
1507 ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1508 '''
1509 threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1510 threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1511
1512 vhsubCode = '''
1513 Element barrowBit =
1514 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1515 // Use division instead of a shift to ensure the sign extension works
1516 // right. The compiler will figure out if it can be a shift. Mask the
1517 // inputs so they get truncated correctly.
1518 destElem = (((srcElem1 & ~(Element)1) / 2) -
1519 ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1520 '''
1521 threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1522 threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1523
1524 vandCode = '''
1525 destElem = srcElem1 & srcElem2;
1526 '''
1527 threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1528 threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1529
1530 vbicCode = '''
1531 destElem = srcElem1 & ~srcElem2;
1532 '''
1533 threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1534 threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1535
1536 vorrCode = '''
1537 destElem = srcElem1 | srcElem2;
1538 '''
1539 threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1540 threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1541
1542 threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1543 threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1544
1545 vornCode = '''
1546 destElem = srcElem1 | ~srcElem2;
1547 '''
1548 threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1549 threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1550
1551 veorCode = '''
1552 destElem = srcElem1 ^ srcElem2;
1553 '''
1554 threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1555 threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1556
1557 vbifCode = '''
1558 destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1559 '''
1560 threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1561 threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1562 vbitCode = '''
1563 destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1564 '''
1565 threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1566 threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1567 vbslCode = '''
1568 destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1569 '''
1570 threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1571 threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1572
1573 vmaxCode = '''
1574 destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1575 '''
1576 threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1577 threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1578
1579 vminCode = '''
1580 destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1581 '''
1582 threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1583 threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1584
1585 vaddCode = '''
1586 destElem = srcElem1 + srcElem2;
1587 '''
1588 threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1589 threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1590
1591 threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1592 2, vaddCode, pairwise=True)
1593 threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1594 4, vaddCode, pairwise=True)
1595 vaddlwCode = '''
1596 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1597 '''
1598 threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1599 threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1600 vaddhnCode = '''
1601 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1602 (sizeof(Element) * 8);
1603 '''
1604 threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1605 vraddhnCode = '''
1606 destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1607 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1608 (sizeof(Element) * 8);
1609 '''
1610 threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1611
1612 vsubCode = '''
1613 destElem = srcElem1 - srcElem2;
1614 '''
1615 threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1616 threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1617 vsublwCode = '''
1618 destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1619 '''
1620 threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1621 threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1622
1623 vqaddUCode = '''
1624 destElem = srcElem1 + srcElem2;
1625 FPSCR fpscr = (FPSCR)Fpscr;
1626 if (destElem < srcElem1 || destElem < srcElem2) {
1627 destElem = (Element)(-1);
1628 fpscr.qc = 1;
1629 }
1630 Fpscr = fpscr;
1631 '''
1632 threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1633 threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1634 vsubhnCode = '''
1635 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1636 (sizeof(Element) * 8);
1637 '''
1638 threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1639 vrsubhnCode = '''
1640 destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1641 ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1642 (sizeof(Element) * 8);
1643 '''
1644 threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1645
1646 vqaddSCode = '''
1647 destElem = srcElem1 + srcElem2;
1648 FPSCR fpscr = (FPSCR)Fpscr;
1649 bool negDest = (destElem < 0);
1650 bool negSrc1 = (srcElem1 < 0);
1651 bool negSrc2 = (srcElem2 < 0);
1652 if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1653 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1654 if (negDest)
1655 destElem -= 1;
1656 fpscr.qc = 1;
1657 }
1658 Fpscr = fpscr;
1659 '''
1660 threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1661 threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1662
1663 vqsubUCode = '''
1664 destElem = srcElem1 - srcElem2;
1665 FPSCR fpscr = (FPSCR)Fpscr;
1666 if (destElem > srcElem1) {
1667 destElem = 0;
1668 fpscr.qc = 1;
1669 }
1670 Fpscr = fpscr;
1671 '''
1672 threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1673 threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1674
1675 vqsubSCode = '''
1676 destElem = srcElem1 - srcElem2;
1677 FPSCR fpscr = (FPSCR)Fpscr;
1678 bool negDest = (destElem < 0);
1679 bool negSrc1 = (srcElem1 < 0);
1680 bool posSrc2 = (srcElem2 >= 0);
1681 if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1682 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1683 if (negDest)
1684 destElem -= 1;
1685 fpscr.qc = 1;
1686 }
1687 Fpscr = fpscr;
1688 '''
1689 threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1690 threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1691
1692 vcgtCode = '''
1693 destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1694 '''
1695 threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1696 threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1697
1698 vcgeCode = '''
1699 destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1700 '''
1701 threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1702 threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1703
1704 vceqCode = '''
1705 destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1706 '''
1707 threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1708 threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1709
1710 vshlCode = '''
1711 int16_t shiftAmt = (int8_t)srcElem2;
1712 if (shiftAmt < 0) {
1713 shiftAmt = -shiftAmt;
1714 if (shiftAmt >= sizeof(Element) * 8) {
1715 shiftAmt = sizeof(Element) * 8 - 1;
1716 destElem = 0;
1717 } else {
1718 destElem = (srcElem1 >> shiftAmt);
1719 }
1720 // Make sure the right shift sign extended when it should.
1721 if (srcElem1 < 0 && destElem >= 0) {
1721 if (ltz(srcElem1) && !ltz(destElem)) {
1722 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1723 1 - shiftAmt));
1724 }
1725 } else {
1726 if (shiftAmt >= sizeof(Element) * 8) {
1727 destElem = 0;
1728 } else {
1729 destElem = srcElem1 << shiftAmt;
1730 }
1731 }
1732 '''
1733 threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1734 threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1735
1736 vrshlCode = '''
1737 int16_t shiftAmt = (int8_t)srcElem2;
1738 if (shiftAmt < 0) {
1739 shiftAmt = -shiftAmt;
1740 Element rBit = 0;
1741 if (shiftAmt <= sizeof(Element) * 8)
1742 rBit = bits(srcElem1, shiftAmt - 1);
1722 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1723 1 - shiftAmt));
1724 }
1725 } else {
1726 if (shiftAmt >= sizeof(Element) * 8) {
1727 destElem = 0;
1728 } else {
1729 destElem = srcElem1 << shiftAmt;
1730 }
1731 }
1732 '''
1733 threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1734 threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1735
1736 vrshlCode = '''
1737 int16_t shiftAmt = (int8_t)srcElem2;
1738 if (shiftAmt < 0) {
1739 shiftAmt = -shiftAmt;
1740 Element rBit = 0;
1741 if (shiftAmt <= sizeof(Element) * 8)
1742 rBit = bits(srcElem1, shiftAmt - 1);
1743 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1743 if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1744 rBit = 1;
1745 if (shiftAmt >= sizeof(Element) * 8) {
1746 shiftAmt = sizeof(Element) * 8 - 1;
1747 destElem = 0;
1748 } else {
1749 destElem = (srcElem1 >> shiftAmt);
1750 }
1751 // Make sure the right shift sign extended when it should.
1744 rBit = 1;
1745 if (shiftAmt >= sizeof(Element) * 8) {
1746 shiftAmt = sizeof(Element) * 8 - 1;
1747 destElem = 0;
1748 } else {
1749 destElem = (srcElem1 >> shiftAmt);
1750 }
1751 // Make sure the right shift sign extended when it should.
1752 if (srcElem1 < 0 && destElem >= 0) {
1752 if (ltz(srcElem1) && !ltz(destElem)) {
1753 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1754 1 - shiftAmt));
1755 }
1756 destElem += rBit;
1757 } else if (shiftAmt > 0) {
1758 if (shiftAmt >= sizeof(Element) * 8) {
1759 destElem = 0;
1760 } else {
1761 destElem = srcElem1 << shiftAmt;
1762 }
1763 } else {
1764 destElem = srcElem1;
1765 }
1766 '''
1767 threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1768 threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1769
1770 vqshlUCode = '''
1771 int16_t shiftAmt = (int8_t)srcElem2;
1772 FPSCR fpscr = (FPSCR)Fpscr;
1773 if (shiftAmt < 0) {
1774 shiftAmt = -shiftAmt;
1775 if (shiftAmt >= sizeof(Element) * 8) {
1776 shiftAmt = sizeof(Element) * 8 - 1;
1777 destElem = 0;
1778 } else {
1779 destElem = (srcElem1 >> shiftAmt);
1780 }
1753 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1754 1 - shiftAmt));
1755 }
1756 destElem += rBit;
1757 } else if (shiftAmt > 0) {
1758 if (shiftAmt >= sizeof(Element) * 8) {
1759 destElem = 0;
1760 } else {
1761 destElem = srcElem1 << shiftAmt;
1762 }
1763 } else {
1764 destElem = srcElem1;
1765 }
1766 '''
1767 threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1768 threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1769
1770 vqshlUCode = '''
1771 int16_t shiftAmt = (int8_t)srcElem2;
1772 FPSCR fpscr = (FPSCR)Fpscr;
1773 if (shiftAmt < 0) {
1774 shiftAmt = -shiftAmt;
1775 if (shiftAmt >= sizeof(Element) * 8) {
1776 shiftAmt = sizeof(Element) * 8 - 1;
1777 destElem = 0;
1778 } else {
1779 destElem = (srcElem1 >> shiftAmt);
1780 }
1781 // Make sure the right shift sign extended when it should.
1782 if (srcElem1 < 0 && destElem >= 0) {
1783 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1784 1 - shiftAmt));
1785 }
1786 } else if (shiftAmt > 0) {
1787 if (shiftAmt >= sizeof(Element) * 8) {
1788 if (srcElem1 != 0) {
1789 destElem = mask(sizeof(Element) * 8);
1790 fpscr.qc = 1;
1791 } else {
1792 destElem = 0;
1793 }
1794 } else {
1795 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1796 sizeof(Element) * 8 - shiftAmt)) {
1797 destElem = mask(sizeof(Element) * 8);
1798 fpscr.qc = 1;
1799 } else {
1800 destElem = srcElem1 << shiftAmt;
1801 }
1802 }
1803 } else {
1804 destElem = srcElem1;
1805 }
1806 Fpscr = fpscr;
1807 '''
1808 threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1809 threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1810
1811 vqshlSCode = '''
1812 int16_t shiftAmt = (int8_t)srcElem2;
1813 FPSCR fpscr = (FPSCR)Fpscr;
1814 if (shiftAmt < 0) {
1815 shiftAmt = -shiftAmt;
1816 if (shiftAmt >= sizeof(Element) * 8) {
1817 shiftAmt = sizeof(Element) * 8 - 1;
1818 destElem = 0;
1819 } else {
1820 destElem = (srcElem1 >> shiftAmt);
1821 }
1822 // Make sure the right shift sign extended when it should.
1823 if (srcElem1 < 0 && destElem >= 0) {
1824 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1825 1 - shiftAmt));
1826 }
1827 } else if (shiftAmt > 0) {
1828 bool sat = false;
1829 if (shiftAmt >= sizeof(Element) * 8) {
1830 if (srcElem1 != 0)
1831 sat = true;
1832 else
1833 destElem = 0;
1834 } else {
1835 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1836 sizeof(Element) * 8 - 1 - shiftAmt) !=
1837 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1838 sat = true;
1839 } else {
1840 destElem = srcElem1 << shiftAmt;
1841 }
1842 }
1843 if (sat) {
1844 fpscr.qc = 1;
1845 destElem = mask(sizeof(Element) * 8 - 1);
1846 if (srcElem1 < 0)
1847 destElem = ~destElem;
1848 }
1849 } else {
1850 destElem = srcElem1;
1851 }
1852 Fpscr = fpscr;
1853 '''
1854 threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1855 threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1856
1857 vqrshlUCode = '''
1858 int16_t shiftAmt = (int8_t)srcElem2;
1859 FPSCR fpscr = (FPSCR)Fpscr;
1860 if (shiftAmt < 0) {
1861 shiftAmt = -shiftAmt;
1862 Element rBit = 0;
1863 if (shiftAmt <= sizeof(Element) * 8)
1864 rBit = bits(srcElem1, shiftAmt - 1);
1781 } else if (shiftAmt > 0) {
1782 if (shiftAmt >= sizeof(Element) * 8) {
1783 if (srcElem1 != 0) {
1784 destElem = mask(sizeof(Element) * 8);
1785 fpscr.qc = 1;
1786 } else {
1787 destElem = 0;
1788 }
1789 } else {
1790 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1791 sizeof(Element) * 8 - shiftAmt)) {
1792 destElem = mask(sizeof(Element) * 8);
1793 fpscr.qc = 1;
1794 } else {
1795 destElem = srcElem1 << shiftAmt;
1796 }
1797 }
1798 } else {
1799 destElem = srcElem1;
1800 }
1801 Fpscr = fpscr;
1802 '''
1803 threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1804 threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1805
1806 vqshlSCode = '''
1807 int16_t shiftAmt = (int8_t)srcElem2;
1808 FPSCR fpscr = (FPSCR)Fpscr;
1809 if (shiftAmt < 0) {
1810 shiftAmt = -shiftAmt;
1811 if (shiftAmt >= sizeof(Element) * 8) {
1812 shiftAmt = sizeof(Element) * 8 - 1;
1813 destElem = 0;
1814 } else {
1815 destElem = (srcElem1 >> shiftAmt);
1816 }
1817 // Make sure the right shift sign extended when it should.
1818 if (srcElem1 < 0 && destElem >= 0) {
1819 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1820 1 - shiftAmt));
1821 }
1822 } else if (shiftAmt > 0) {
1823 bool sat = false;
1824 if (shiftAmt >= sizeof(Element) * 8) {
1825 if (srcElem1 != 0)
1826 sat = true;
1827 else
1828 destElem = 0;
1829 } else {
1830 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1831 sizeof(Element) * 8 - 1 - shiftAmt) !=
1832 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1833 sat = true;
1834 } else {
1835 destElem = srcElem1 << shiftAmt;
1836 }
1837 }
1838 if (sat) {
1839 fpscr.qc = 1;
1840 destElem = mask(sizeof(Element) * 8 - 1);
1841 if (srcElem1 < 0)
1842 destElem = ~destElem;
1843 }
1844 } else {
1845 destElem = srcElem1;
1846 }
1847 Fpscr = fpscr;
1848 '''
1849 threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1850 threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1851
1852 vqrshlUCode = '''
1853 int16_t shiftAmt = (int8_t)srcElem2;
1854 FPSCR fpscr = (FPSCR)Fpscr;
1855 if (shiftAmt < 0) {
1856 shiftAmt = -shiftAmt;
1857 Element rBit = 0;
1858 if (shiftAmt <= sizeof(Element) * 8)
1859 rBit = bits(srcElem1, shiftAmt - 1);
1865 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1866 rBit = 1;
1867 if (shiftAmt >= sizeof(Element) * 8) {
1868 shiftAmt = sizeof(Element) * 8 - 1;
1869 destElem = 0;
1870 } else {
1871 destElem = (srcElem1 >> shiftAmt);
1872 }
1860 if (shiftAmt >= sizeof(Element) * 8) {
1861 shiftAmt = sizeof(Element) * 8 - 1;
1862 destElem = 0;
1863 } else {
1864 destElem = (srcElem1 >> shiftAmt);
1865 }
1873 // Make sure the right shift sign extended when it should.
1874 if (srcElem1 < 0 && destElem >= 0) {
1875 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1876 1 - shiftAmt));
1877 }
1878 destElem += rBit;
1879 } else {
1880 if (shiftAmt >= sizeof(Element) * 8) {
1881 if (srcElem1 != 0) {
1882 destElem = mask(sizeof(Element) * 8);
1883 fpscr.qc = 1;
1884 } else {
1885 destElem = 0;
1886 }
1887 } else {
1888 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1889 sizeof(Element) * 8 - shiftAmt)) {
1890 destElem = mask(sizeof(Element) * 8);
1891 fpscr.qc = 1;
1892 } else {
1893 destElem = srcElem1 << shiftAmt;
1894 }
1895 }
1896 }
1897 Fpscr = fpscr;
1898 '''
1899 threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1900 threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1901
1902 vqrshlSCode = '''
1903 int16_t shiftAmt = (int8_t)srcElem2;
1904 FPSCR fpscr = (FPSCR)Fpscr;
1905 if (shiftAmt < 0) {
1906 shiftAmt = -shiftAmt;
1907 Element rBit = 0;
1908 if (shiftAmt <= sizeof(Element) * 8)
1909 rBit = bits(srcElem1, shiftAmt - 1);
1910 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1911 rBit = 1;
1912 if (shiftAmt >= sizeof(Element) * 8) {
1913 shiftAmt = sizeof(Element) * 8 - 1;
1914 destElem = 0;
1915 } else {
1916 destElem = (srcElem1 >> shiftAmt);
1917 }
1918 // Make sure the right shift sign extended when it should.
1919 if (srcElem1 < 0 && destElem >= 0) {
1920 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1921 1 - shiftAmt));
1922 }
1923 destElem += rBit;
1924 } else if (shiftAmt > 0) {
1925 bool sat = false;
1926 if (shiftAmt >= sizeof(Element) * 8) {
1927 if (srcElem1 != 0)
1928 sat = true;
1929 else
1930 destElem = 0;
1931 } else {
1932 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1933 sizeof(Element) * 8 - 1 - shiftAmt) !=
1934 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1935 sat = true;
1936 } else {
1937 destElem = srcElem1 << shiftAmt;
1938 }
1939 }
1940 if (sat) {
1941 fpscr.qc = 1;
1942 destElem = mask(sizeof(Element) * 8 - 1);
1943 if (srcElem1 < 0)
1944 destElem = ~destElem;
1945 }
1946 } else {
1947 destElem = srcElem1;
1948 }
1949 Fpscr = fpscr;
1950 '''
1951 threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1952 threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1953
1954 vabaCode = '''
1955 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1956 (srcElem2 - srcElem1);
1957 '''
1958 threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1959 threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1960 vabalCode = '''
1961 destElem += (srcElem1 > srcElem2) ?
1962 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1963 ((BigElement)srcElem2 - (BigElement)srcElem1);
1964 '''
1965 threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1966
1967 vabdCode = '''
1968 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1969 (srcElem2 - srcElem1);
1970 '''
1971 threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1972 threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1973 vabdlCode = '''
1974 destElem = (srcElem1 > srcElem2) ?
1975 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1976 ((BigElement)srcElem2 - (BigElement)srcElem1);
1977 '''
1978 threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1979
1980 vtstCode = '''
1981 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1982 '''
1983 threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1984 threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1985
1986 vmulCode = '''
1987 destElem = srcElem1 * srcElem2;
1988 '''
1989 threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1990 threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1991 vmullCode = '''
1992 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1993 '''
1994 threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1995
1996 vmlaCode = '''
1997 destElem = destElem + srcElem1 * srcElem2;
1998 '''
1999 threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
2000 threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
2001 vmlalCode = '''
2002 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2003 '''
2004 threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
2005
2006 vqdmlalCode = '''
2007 FPSCR fpscr = (FPSCR)Fpscr;
2008 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2009 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2010 Element halfNeg = maxNeg / 2;
2011 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2012 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2013 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2014 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2015 fpscr.qc = 1;
2016 }
1866 destElem += rBit;
1867 } else {
1868 if (shiftAmt >= sizeof(Element) * 8) {
1869 if (srcElem1 != 0) {
1870 destElem = mask(sizeof(Element) * 8);
1871 fpscr.qc = 1;
1872 } else {
1873 destElem = 0;
1874 }
1875 } else {
1876 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1877 sizeof(Element) * 8 - shiftAmt)) {
1878 destElem = mask(sizeof(Element) * 8);
1879 fpscr.qc = 1;
1880 } else {
1881 destElem = srcElem1 << shiftAmt;
1882 }
1883 }
1884 }
1885 Fpscr = fpscr;
1886 '''
1887 threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1888 threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1889
1890 vqrshlSCode = '''
1891 int16_t shiftAmt = (int8_t)srcElem2;
1892 FPSCR fpscr = (FPSCR)Fpscr;
1893 if (shiftAmt < 0) {
1894 shiftAmt = -shiftAmt;
1895 Element rBit = 0;
1896 if (shiftAmt <= sizeof(Element) * 8)
1897 rBit = bits(srcElem1, shiftAmt - 1);
1898 if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1899 rBit = 1;
1900 if (shiftAmt >= sizeof(Element) * 8) {
1901 shiftAmt = sizeof(Element) * 8 - 1;
1902 destElem = 0;
1903 } else {
1904 destElem = (srcElem1 >> shiftAmt);
1905 }
1906 // Make sure the right shift sign extended when it should.
1907 if (srcElem1 < 0 && destElem >= 0) {
1908 destElem |= -((Element)1 << (sizeof(Element) * 8 -
1909 1 - shiftAmt));
1910 }
1911 destElem += rBit;
1912 } else if (shiftAmt > 0) {
1913 bool sat = false;
1914 if (shiftAmt >= sizeof(Element) * 8) {
1915 if (srcElem1 != 0)
1916 sat = true;
1917 else
1918 destElem = 0;
1919 } else {
1920 if (bits(srcElem1, sizeof(Element) * 8 - 1,
1921 sizeof(Element) * 8 - 1 - shiftAmt) !=
1922 ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1923 sat = true;
1924 } else {
1925 destElem = srcElem1 << shiftAmt;
1926 }
1927 }
1928 if (sat) {
1929 fpscr.qc = 1;
1930 destElem = mask(sizeof(Element) * 8 - 1);
1931 if (srcElem1 < 0)
1932 destElem = ~destElem;
1933 }
1934 } else {
1935 destElem = srcElem1;
1936 }
1937 Fpscr = fpscr;
1938 '''
1939 threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1940 threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1941
1942 vabaCode = '''
1943 destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1944 (srcElem2 - srcElem1);
1945 '''
1946 threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1947 threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1948 vabalCode = '''
1949 destElem += (srcElem1 > srcElem2) ?
1950 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1951 ((BigElement)srcElem2 - (BigElement)srcElem1);
1952 '''
1953 threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1954
1955 vabdCode = '''
1956 destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1957 (srcElem2 - srcElem1);
1958 '''
1959 threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1960 threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1961 vabdlCode = '''
1962 destElem = (srcElem1 > srcElem2) ?
1963 ((BigElement)srcElem1 - (BigElement)srcElem2) :
1964 ((BigElement)srcElem2 - (BigElement)srcElem1);
1965 '''
1966 threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1967
1968 vtstCode = '''
1969 destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1970 '''
1971 threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1972 threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1973
1974 vmulCode = '''
1975 destElem = srcElem1 * srcElem2;
1976 '''
1977 threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1978 threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1979 vmullCode = '''
1980 destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1981 '''
1982 threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1983
1984 vmlaCode = '''
1985 destElem = destElem + srcElem1 * srcElem2;
1986 '''
1987 threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
1988 threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
1989 vmlalCode = '''
1990 destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
1991 '''
1992 threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
1993
1994 vqdmlalCode = '''
1995 FPSCR fpscr = (FPSCR)Fpscr;
1996 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
1997 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
1998 Element halfNeg = maxNeg / 2;
1999 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2000 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2001 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2002 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2003 fpscr.qc = 1;
2004 }
2017 bool negPreDest = (destElem < 0);
2005 bool negPreDest = ltz(destElem);
2018 destElem += midElem;
2006 destElem += midElem;
2019 bool negDest = (destElem < 0);
2020 bool negMid = (midElem < 0);
2007 bool negDest = ltz(destElem);
2008 bool negMid = ltz(midElem);
2021 if (negPreDest == negMid && negMid != negDest) {
2022 destElem = mask(sizeof(BigElement) * 8 - 1);
2023 if (negPreDest)
2024 destElem = ~destElem;
2025 fpscr.qc = 1;
2026 }
2027 Fpscr = fpscr;
2028 '''
2029 threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2030
2031 vqdmlslCode = '''
2032 FPSCR fpscr = (FPSCR)Fpscr;
2033 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2034 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2035 Element halfNeg = maxNeg / 2;
2036 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2037 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2038 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2039 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2040 fpscr.qc = 1;
2041 }
2009 if (negPreDest == negMid && negMid != negDest) {
2010 destElem = mask(sizeof(BigElement) * 8 - 1);
2011 if (negPreDest)
2012 destElem = ~destElem;
2013 fpscr.qc = 1;
2014 }
2015 Fpscr = fpscr;
2016 '''
2017 threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2018
2019 vqdmlslCode = '''
2020 FPSCR fpscr = (FPSCR)Fpscr;
2021 BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2022 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2023 Element halfNeg = maxNeg / 2;
2024 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2025 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2026 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2027 midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2028 fpscr.qc = 1;
2029 }
2042 bool negPreDest = (destElem < 0);
2030 bool negPreDest = ltz(destElem);
2043 destElem -= midElem;
2031 destElem -= midElem;
2044 bool negDest = (destElem < 0);
2045 bool posMid = (midElem > 0);
2032 bool negDest = ltz(destElem);
2033 bool posMid = ltz((BigElement)-midElem);
2046 if (negPreDest == posMid && posMid != negDest) {
2047 destElem = mask(sizeof(BigElement) * 8 - 1);
2048 if (negPreDest)
2049 destElem = ~destElem;
2050 fpscr.qc = 1;
2051 }
2052 Fpscr = fpscr;
2053 '''
2054 threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2055
2056 vqdmullCode = '''
2057 FPSCR fpscr = (FPSCR)Fpscr;
2058 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2059 if (srcElem1 == srcElem2 &&
2060 srcElem1 == (Element)((Element)1 <<
2061 (Element)(sizeof(Element) * 8 - 1))) {
2062 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2063 fpscr.qc = 1;
2064 }
2065 Fpscr = fpscr;
2066 '''
2067 threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2068
2069 vmlsCode = '''
2070 destElem = destElem - srcElem1 * srcElem2;
2071 '''
2072 threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2073 threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2074 vmlslCode = '''
2075 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2076 '''
2077 threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2078
2079 vmulpCode = '''
2080 destElem = 0;
2081 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2082 if (bits(srcElem2, j))
2083 destElem ^= srcElem1 << j;
2084 }
2085 '''
2086 threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2087 threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2088 vmullpCode = '''
2089 destElem = 0;
2090 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2091 if (bits(srcElem2, j))
2092 destElem ^= (BigElement)srcElem1 << j;
2093 }
2094 '''
2095 threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2096
2097 threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2098 threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2099
2100 threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2101 threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2102
2103 vqdmulhCode = '''
2104 FPSCR fpscr = (FPSCR)Fpscr;
2105 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2106 (sizeof(Element) * 8);
2107 if (srcElem1 == srcElem2 &&
2108 srcElem1 == (Element)((Element)1 <<
2109 (sizeof(Element) * 8 - 1))) {
2110 destElem = ~srcElem1;
2111 fpscr.qc = 1;
2112 }
2113 Fpscr = fpscr;
2114 '''
2115 threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2116 threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2117
2118 vqrdmulhCode = '''
2119 FPSCR fpscr = (FPSCR)Fpscr;
2120 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2121 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2122 (sizeof(Element) * 8);
2123 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2124 Element halfNeg = maxNeg / 2;
2125 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2126 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2127 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2128 if (destElem < 0) {
2129 destElem = mask(sizeof(Element) * 8 - 1);
2130 } else {
2131 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2132 }
2133 fpscr.qc = 1;
2134 }
2135 Fpscr = fpscr;
2136 '''
2137 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2138 smallSignedTypes, 2, vqrdmulhCode)
2139 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2140 smallSignedTypes, 4, vqrdmulhCode)
2141
2142 vmaxfpCode = '''
2143 FPSCR fpscr = (FPSCR)Fpscr;
2144 bool done;
2145 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2146 if (!done) {
2147 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2148 true, true, VfpRoundNearest);
2149 } else if (flushToZero(srcReg1, srcReg2)) {
2150 fpscr.idc = 1;
2151 }
2152 Fpscr = fpscr;
2153 '''
2154 threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2155 threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2156
2157 vminfpCode = '''
2158 FPSCR fpscr = (FPSCR)Fpscr;
2159 bool done;
2160 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2161 if (!done) {
2162 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2163 true, true, VfpRoundNearest);
2164 } else if (flushToZero(srcReg1, srcReg2)) {
2165 fpscr.idc = 1;
2166 }
2167 Fpscr = fpscr;
2168 '''
2169 threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2170 threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2171
2172 threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2173 2, vmaxfpCode, pairwise=True)
2174 threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2175 4, vmaxfpCode, pairwise=True)
2176
2177 threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2178 2, vminfpCode, pairwise=True)
2179 threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2180 4, vminfpCode, pairwise=True)
2181
2182 vaddfpCode = '''
2183 FPSCR fpscr = Fpscr;
2184 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2185 true, true, VfpRoundNearest);
2186 Fpscr = fpscr;
2187 '''
2188 threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2189 threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2190
2191 threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2192 2, vaddfpCode, pairwise=True)
2193 threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2194 4, vaddfpCode, pairwise=True)
2195
2196 vsubfpCode = '''
2197 FPSCR fpscr = Fpscr;
2198 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2199 true, true, VfpRoundNearest);
2200 Fpscr = fpscr;
2201 '''
2202 threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2203 threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2204
2205 vmulfpCode = '''
2206 FPSCR fpscr = Fpscr;
2207 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2208 true, true, VfpRoundNearest);
2209 Fpscr = fpscr;
2210 '''
2211 threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2212 threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2213
2214 vmlafpCode = '''
2215 FPSCR fpscr = Fpscr;
2216 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2217 true, true, VfpRoundNearest);
2218 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2219 true, true, VfpRoundNearest);
2220 Fpscr = fpscr;
2221 '''
2222 threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2223 threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2224
2225 vmlsfpCode = '''
2226 FPSCR fpscr = Fpscr;
2227 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2228 true, true, VfpRoundNearest);
2229 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2230 true, true, VfpRoundNearest);
2231 Fpscr = fpscr;
2232 '''
2233 threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2234 threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2235
2236 vcgtfpCode = '''
2237 FPSCR fpscr = (FPSCR)Fpscr;
2238 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2239 true, true, VfpRoundNearest);
2240 destReg = (res == 0) ? -1 : 0;
2241 if (res == 2.0)
2242 fpscr.ioc = 1;
2243 Fpscr = fpscr;
2244 '''
2245 threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2246 2, vcgtfpCode, toInt = True)
2247 threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2248 4, vcgtfpCode, toInt = True)
2249
2250 vcgefpCode = '''
2251 FPSCR fpscr = (FPSCR)Fpscr;
2252 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2253 true, true, VfpRoundNearest);
2254 destReg = (res == 0) ? -1 : 0;
2255 if (res == 2.0)
2256 fpscr.ioc = 1;
2257 Fpscr = fpscr;
2258 '''
2259 threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2260 2, vcgefpCode, toInt = True)
2261 threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2262 4, vcgefpCode, toInt = True)
2263
2264 vacgtfpCode = '''
2265 FPSCR fpscr = (FPSCR)Fpscr;
2266 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2267 true, true, VfpRoundNearest);
2268 destReg = (res == 0) ? -1 : 0;
2269 if (res == 2.0)
2270 fpscr.ioc = 1;
2271 Fpscr = fpscr;
2272 '''
2273 threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2274 2, vacgtfpCode, toInt = True)
2275 threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2276 4, vacgtfpCode, toInt = True)
2277
2278 vacgefpCode = '''
2279 FPSCR fpscr = (FPSCR)Fpscr;
2280 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2281 true, true, VfpRoundNearest);
2282 destReg = (res == 0) ? -1 : 0;
2283 if (res == 2.0)
2284 fpscr.ioc = 1;
2285 Fpscr = fpscr;
2286 '''
2287 threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2288 2, vacgefpCode, toInt = True)
2289 threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2290 4, vacgefpCode, toInt = True)
2291
2292 vceqfpCode = '''
2293 FPSCR fpscr = (FPSCR)Fpscr;
2294 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2295 true, true, VfpRoundNearest);
2296 destReg = (res == 0) ? -1 : 0;
2297 if (res == 2.0)
2298 fpscr.ioc = 1;
2299 Fpscr = fpscr;
2300 '''
2301 threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2302 2, vceqfpCode, toInt = True)
2303 threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2304 4, vceqfpCode, toInt = True)
2305
2306 vrecpsCode = '''
2307 FPSCR fpscr = Fpscr;
2308 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2309 true, true, VfpRoundNearest);
2310 Fpscr = fpscr;
2311 '''
2312 threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2313 threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2314
2315 vrsqrtsCode = '''
2316 FPSCR fpscr = Fpscr;
2317 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2318 true, true, VfpRoundNearest);
2319 Fpscr = fpscr;
2320 '''
2321 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2322 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2323
2324 vabdfpCode = '''
2325 FPSCR fpscr = Fpscr;
2326 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2327 true, true, VfpRoundNearest);
2328 destReg = fabs(mid);
2329 Fpscr = fpscr;
2330 '''
2331 threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2332 threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2333
2334 twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2335 twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2336 twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2337 twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2338 twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2339
2340 twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2341 twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2342 twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2343 twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2344 twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2345
2346 twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2347 twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2348 twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2349 twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2350 twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2351
2352 twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2353 twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2354 twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2355 twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2356 twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2357 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2358 smallSignedTypes, 2, vqrdmulhCode)
2359 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2360 smallSignedTypes, 4, vqrdmulhCode)
2361
2362 vshrCode = '''
2363 if (imm >= sizeof(srcElem1) * 8) {
2034 if (negPreDest == posMid && posMid != negDest) {
2035 destElem = mask(sizeof(BigElement) * 8 - 1);
2036 if (negPreDest)
2037 destElem = ~destElem;
2038 fpscr.qc = 1;
2039 }
2040 Fpscr = fpscr;
2041 '''
2042 threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2043
2044 vqdmullCode = '''
2045 FPSCR fpscr = (FPSCR)Fpscr;
2046 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2047 if (srcElem1 == srcElem2 &&
2048 srcElem1 == (Element)((Element)1 <<
2049 (Element)(sizeof(Element) * 8 - 1))) {
2050 destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2051 fpscr.qc = 1;
2052 }
2053 Fpscr = fpscr;
2054 '''
2055 threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2056
2057 vmlsCode = '''
2058 destElem = destElem - srcElem1 * srcElem2;
2059 '''
2060 threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2061 threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2062 vmlslCode = '''
2063 destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2064 '''
2065 threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2066
2067 vmulpCode = '''
2068 destElem = 0;
2069 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2070 if (bits(srcElem2, j))
2071 destElem ^= srcElem1 << j;
2072 }
2073 '''
2074 threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2075 threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2076 vmullpCode = '''
2077 destElem = 0;
2078 for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2079 if (bits(srcElem2, j))
2080 destElem ^= (BigElement)srcElem1 << j;
2081 }
2082 '''
2083 threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2084
2085 threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2086 threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2087
2088 threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2089 threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2090
2091 vqdmulhCode = '''
2092 FPSCR fpscr = (FPSCR)Fpscr;
2093 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2094 (sizeof(Element) * 8);
2095 if (srcElem1 == srcElem2 &&
2096 srcElem1 == (Element)((Element)1 <<
2097 (sizeof(Element) * 8 - 1))) {
2098 destElem = ~srcElem1;
2099 fpscr.qc = 1;
2100 }
2101 Fpscr = fpscr;
2102 '''
2103 threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2104 threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2105
2106 vqrdmulhCode = '''
2107 FPSCR fpscr = (FPSCR)Fpscr;
2108 destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2109 ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2110 (sizeof(Element) * 8);
2111 Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2112 Element halfNeg = maxNeg / 2;
2113 if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2114 (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2115 (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2116 if (destElem < 0) {
2117 destElem = mask(sizeof(Element) * 8 - 1);
2118 } else {
2119 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2120 }
2121 fpscr.qc = 1;
2122 }
2123 Fpscr = fpscr;
2124 '''
2125 threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2126 smallSignedTypes, 2, vqrdmulhCode)
2127 threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2128 smallSignedTypes, 4, vqrdmulhCode)
2129
2130 vmaxfpCode = '''
2131 FPSCR fpscr = (FPSCR)Fpscr;
2132 bool done;
2133 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2134 if (!done) {
2135 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2136 true, true, VfpRoundNearest);
2137 } else if (flushToZero(srcReg1, srcReg2)) {
2138 fpscr.idc = 1;
2139 }
2140 Fpscr = fpscr;
2141 '''
2142 threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2143 threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2144
2145 vminfpCode = '''
2146 FPSCR fpscr = (FPSCR)Fpscr;
2147 bool done;
2148 destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2149 if (!done) {
2150 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2151 true, true, VfpRoundNearest);
2152 } else if (flushToZero(srcReg1, srcReg2)) {
2153 fpscr.idc = 1;
2154 }
2155 Fpscr = fpscr;
2156 '''
2157 threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2158 threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2159
2160 threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2161 2, vmaxfpCode, pairwise=True)
2162 threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2163 4, vmaxfpCode, pairwise=True)
2164
2165 threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2166 2, vminfpCode, pairwise=True)
2167 threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2168 4, vminfpCode, pairwise=True)
2169
2170 vaddfpCode = '''
2171 FPSCR fpscr = Fpscr;
2172 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2173 true, true, VfpRoundNearest);
2174 Fpscr = fpscr;
2175 '''
2176 threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2177 threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2178
2179 threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2180 2, vaddfpCode, pairwise=True)
2181 threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2182 4, vaddfpCode, pairwise=True)
2183
2184 vsubfpCode = '''
2185 FPSCR fpscr = Fpscr;
2186 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2187 true, true, VfpRoundNearest);
2188 Fpscr = fpscr;
2189 '''
2190 threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2191 threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2192
2193 vmulfpCode = '''
2194 FPSCR fpscr = Fpscr;
2195 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2196 true, true, VfpRoundNearest);
2197 Fpscr = fpscr;
2198 '''
2199 threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2200 threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2201
2202 vmlafpCode = '''
2203 FPSCR fpscr = Fpscr;
2204 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2205 true, true, VfpRoundNearest);
2206 destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2207 true, true, VfpRoundNearest);
2208 Fpscr = fpscr;
2209 '''
2210 threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2211 threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2212
2213 vmlsfpCode = '''
2214 FPSCR fpscr = Fpscr;
2215 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2216 true, true, VfpRoundNearest);
2217 destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2218 true, true, VfpRoundNearest);
2219 Fpscr = fpscr;
2220 '''
2221 threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2222 threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2223
2224 vcgtfpCode = '''
2225 FPSCR fpscr = (FPSCR)Fpscr;
2226 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2227 true, true, VfpRoundNearest);
2228 destReg = (res == 0) ? -1 : 0;
2229 if (res == 2.0)
2230 fpscr.ioc = 1;
2231 Fpscr = fpscr;
2232 '''
2233 threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2234 2, vcgtfpCode, toInt = True)
2235 threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2236 4, vcgtfpCode, toInt = True)
2237
2238 vcgefpCode = '''
2239 FPSCR fpscr = (FPSCR)Fpscr;
2240 float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2241 true, true, VfpRoundNearest);
2242 destReg = (res == 0) ? -1 : 0;
2243 if (res == 2.0)
2244 fpscr.ioc = 1;
2245 Fpscr = fpscr;
2246 '''
2247 threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2248 2, vcgefpCode, toInt = True)
2249 threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2250 4, vcgefpCode, toInt = True)
2251
2252 vacgtfpCode = '''
2253 FPSCR fpscr = (FPSCR)Fpscr;
2254 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2255 true, true, VfpRoundNearest);
2256 destReg = (res == 0) ? -1 : 0;
2257 if (res == 2.0)
2258 fpscr.ioc = 1;
2259 Fpscr = fpscr;
2260 '''
2261 threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2262 2, vacgtfpCode, toInt = True)
2263 threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2264 4, vacgtfpCode, toInt = True)
2265
2266 vacgefpCode = '''
2267 FPSCR fpscr = (FPSCR)Fpscr;
2268 float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2269 true, true, VfpRoundNearest);
2270 destReg = (res == 0) ? -1 : 0;
2271 if (res == 2.0)
2272 fpscr.ioc = 1;
2273 Fpscr = fpscr;
2274 '''
2275 threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2276 2, vacgefpCode, toInt = True)
2277 threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2278 4, vacgefpCode, toInt = True)
2279
2280 vceqfpCode = '''
2281 FPSCR fpscr = (FPSCR)Fpscr;
2282 float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2283 true, true, VfpRoundNearest);
2284 destReg = (res == 0) ? -1 : 0;
2285 if (res == 2.0)
2286 fpscr.ioc = 1;
2287 Fpscr = fpscr;
2288 '''
2289 threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2290 2, vceqfpCode, toInt = True)
2291 threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2292 4, vceqfpCode, toInt = True)
2293
2294 vrecpsCode = '''
2295 FPSCR fpscr = Fpscr;
2296 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2297 true, true, VfpRoundNearest);
2298 Fpscr = fpscr;
2299 '''
2300 threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2301 threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2302
2303 vrsqrtsCode = '''
2304 FPSCR fpscr = Fpscr;
2305 destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2306 true, true, VfpRoundNearest);
2307 Fpscr = fpscr;
2308 '''
2309 threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2310 threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2311
2312 vabdfpCode = '''
2313 FPSCR fpscr = Fpscr;
2314 float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2315 true, true, VfpRoundNearest);
2316 destReg = fabs(mid);
2317 Fpscr = fpscr;
2318 '''
2319 threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2320 threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2321
2322 twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2323 twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2324 twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2325 twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2326 twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2327
2328 twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2329 twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2330 twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2331 twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2332 twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2333
2334 twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2335 twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2336 twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2337 twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2338 twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2339
2340 twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2341 twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2342 twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2343 twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2344 twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2345 twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2346 smallSignedTypes, 2, vqrdmulhCode)
2347 twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2348 smallSignedTypes, 4, vqrdmulhCode)
2349
2350 vshrCode = '''
2351 if (imm >= sizeof(srcElem1) * 8) {
2364 if (srcElem1 < 0)
2352 if (ltz(srcElem1))
2365 destElem = -1;
2366 else
2367 destElem = 0;
2368 } else {
2369 destElem = srcElem1 >> imm;
2370 }
2371 '''
2372 twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2373 twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2374
2375 vsraCode = '''
2376 Element mid;;
2377 if (imm >= sizeof(srcElem1) * 8) {
2353 destElem = -1;
2354 else
2355 destElem = 0;
2356 } else {
2357 destElem = srcElem1 >> imm;
2358 }
2359 '''
2360 twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2361 twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2362
2363 vsraCode = '''
2364 Element mid;;
2365 if (imm >= sizeof(srcElem1) * 8) {
2378 mid = (srcElem1 < 0) ? -1 : 0;
2366 mid = ltz(srcElem1) ? -1 : 0;
2379 } else {
2380 mid = srcElem1 >> imm;
2367 } else {
2368 mid = srcElem1 >> imm;
2381 if (srcElem1 < 0 && mid >= 0) {
2369 if (ltz(srcElem1) && !ltz(mid)) {
2382 mid |= -(mid & ((Element)1 <<
2383 (sizeof(Element) * 8 - 1 - imm)));
2384 }
2385 }
2386 destElem += mid;
2387 '''
2388 twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2389 twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2390
2391 vrshrCode = '''
2392 if (imm > sizeof(srcElem1) * 8) {
2393 destElem = 0;
2394 } else if (imm) {
2395 Element rBit = bits(srcElem1, imm - 1);
2396 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2397 } else {
2398 destElem = srcElem1;
2399 }
2400 '''
2401 twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2402 twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2403
2404 vrsraCode = '''
2405 if (imm > sizeof(srcElem1) * 8) {
2406 destElem += 0;
2407 } else if (imm) {
2408 Element rBit = bits(srcElem1, imm - 1);
2409 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2410 } else {
2411 destElem += srcElem1;
2412 }
2413 '''
2414 twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2415 twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2416
2417 vsriCode = '''
2418 if (imm >= sizeof(Element) * 8)
2419 destElem = destElem;
2420 else
2421 destElem = (srcElem1 >> imm) |
2422 (destElem & ~mask(sizeof(Element) * 8 - imm));
2423 '''
2424 twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2425 twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2426
2427 vshlCode = '''
2428 if (imm >= sizeof(Element) * 8)
2429 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2430 else
2431 destElem = srcElem1 << imm;
2432 '''
2433 twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2434 twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2435
2436 vsliCode = '''
2437 if (imm >= sizeof(Element) * 8)
2438 destElem = destElem;
2439 else
2440 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2441 '''
2442 twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2443 twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2444
2445 vqshlCode = '''
2446 FPSCR fpscr = (FPSCR)Fpscr;
2447 if (imm >= sizeof(Element) * 8) {
2448 if (srcElem1 != 0) {
2449 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2450 if (srcElem1 > 0)
2451 destElem = ~destElem;
2452 fpscr.qc = 1;
2453 } else {
2454 destElem = 0;
2455 }
2456 } else if (imm) {
2457 destElem = (srcElem1 << imm);
2458 uint64_t topBits = bits((uint64_t)srcElem1,
2459 sizeof(Element) * 8 - 1,
2460 sizeof(Element) * 8 - 1 - imm);
2461 if (topBits != 0 && topBits != mask(imm + 1)) {
2462 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2463 if (srcElem1 > 0)
2464 destElem = ~destElem;
2465 fpscr.qc = 1;
2466 }
2467 } else {
2468 destElem = srcElem1;
2469 }
2470 Fpscr = fpscr;
2471 '''
2472 twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2473 twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2474
2475 vqshluCode = '''
2476 FPSCR fpscr = (FPSCR)Fpscr;
2477 if (imm >= sizeof(Element) * 8) {
2478 if (srcElem1 != 0) {
2479 destElem = mask(sizeof(Element) * 8);
2480 fpscr.qc = 1;
2481 } else {
2482 destElem = 0;
2483 }
2484 } else if (imm) {
2485 destElem = (srcElem1 << imm);
2486 uint64_t topBits = bits((uint64_t)srcElem1,
2487 sizeof(Element) * 8 - 1,
2488 sizeof(Element) * 8 - imm);
2489 if (topBits != 0) {
2490 destElem = mask(sizeof(Element) * 8);
2491 fpscr.qc = 1;
2492 }
2493 } else {
2494 destElem = srcElem1;
2495 }
2496 Fpscr = fpscr;
2497 '''
2498 twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2499 twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2500
2501 vqshlusCode = '''
2502 FPSCR fpscr = (FPSCR)Fpscr;
2503 if (imm >= sizeof(Element) * 8) {
2504 if (srcElem1 < 0) {
2505 destElem = 0;
2506 fpscr.qc = 1;
2507 } else if (srcElem1 > 0) {
2508 destElem = mask(sizeof(Element) * 8);
2509 fpscr.qc = 1;
2510 } else {
2511 destElem = 0;
2512 }
2513 } else if (imm) {
2514 destElem = (srcElem1 << imm);
2515 uint64_t topBits = bits((uint64_t)srcElem1,
2516 sizeof(Element) * 8 - 1,
2517 sizeof(Element) * 8 - imm);
2518 if (srcElem1 < 0) {
2519 destElem = 0;
2520 fpscr.qc = 1;
2521 } else if (topBits != 0) {
2522 destElem = mask(sizeof(Element) * 8);
2523 fpscr.qc = 1;
2524 }
2525 } else {
2526 if (srcElem1 < 0) {
2527 fpscr.qc = 1;
2528 destElem = 0;
2529 } else {
2530 destElem = srcElem1;
2531 }
2532 }
2533 Fpscr = fpscr;
2534 '''
2535 twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2536 twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2537
2538 vshrnCode = '''
2539 if (imm >= sizeof(srcElem1) * 8) {
2540 destElem = 0;
2541 } else {
2542 destElem = srcElem1 >> imm;
2543 }
2544 '''
2545 twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2546
2547 vrshrnCode = '''
2548 if (imm > sizeof(srcElem1) * 8) {
2549 destElem = 0;
2550 } else if (imm) {
2551 Element rBit = bits(srcElem1, imm - 1);
2552 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2553 } else {
2554 destElem = srcElem1;
2555 }
2556 '''
2557 twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2558
2559 vqshrnCode = '''
2560 FPSCR fpscr = (FPSCR)Fpscr;
2561 if (imm > sizeof(srcElem1) * 8) {
2562 if (srcElem1 != 0 && srcElem1 != -1)
2563 fpscr.qc = 1;
2564 destElem = 0;
2565 } else if (imm) {
2566 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2567 mid |= -(mid & ((BigElement)1 <<
2568 (sizeof(BigElement) * 8 - 1 - imm)));
2569 if (mid != (Element)mid) {
2570 destElem = mask(sizeof(Element) * 8 - 1);
2571 if (srcElem1 < 0)
2572 destElem = ~destElem;
2573 fpscr.qc = 1;
2574 } else {
2575 destElem = mid;
2576 }
2577 } else {
2578 destElem = srcElem1;
2579 }
2580 Fpscr = fpscr;
2581 '''
2582 twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2583
2584 vqshrunCode = '''
2585 FPSCR fpscr = (FPSCR)Fpscr;
2586 if (imm > sizeof(srcElem1) * 8) {
2587 if (srcElem1 != 0)
2588 fpscr.qc = 1;
2589 destElem = 0;
2590 } else if (imm) {
2591 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2592 if (mid != (Element)mid) {
2593 destElem = mask(sizeof(Element) * 8);
2594 fpscr.qc = 1;
2595 } else {
2596 destElem = mid;
2597 }
2598 } else {
2599 destElem = srcElem1;
2600 }
2601 Fpscr = fpscr;
2602 '''
2603 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2604 smallUnsignedTypes, vqshrunCode)
2605
2606 vqshrunsCode = '''
2607 FPSCR fpscr = (FPSCR)Fpscr;
2608 if (imm > sizeof(srcElem1) * 8) {
2609 if (srcElem1 != 0)
2610 fpscr.qc = 1;
2611 destElem = 0;
2612 } else if (imm) {
2613 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2614 if (bits(mid, sizeof(BigElement) * 8 - 1,
2615 sizeof(Element) * 8) != 0) {
2616 if (srcElem1 < 0) {
2617 destElem = 0;
2618 } else {
2619 destElem = mask(sizeof(Element) * 8);
2620 }
2621 fpscr.qc = 1;
2622 } else {
2623 destElem = mid;
2624 }
2625 } else {
2626 destElem = srcElem1;
2627 }
2628 Fpscr = fpscr;
2629 '''
2630 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2631 smallSignedTypes, vqshrunsCode)
2632
2633 vqrshrnCode = '''
2634 FPSCR fpscr = (FPSCR)Fpscr;
2635 if (imm > sizeof(srcElem1) * 8) {
2636 if (srcElem1 != 0 && srcElem1 != -1)
2637 fpscr.qc = 1;
2638 destElem = 0;
2639 } else if (imm) {
2640 BigElement mid = (srcElem1 >> (imm - 1));
2641 uint64_t rBit = mid & 0x1;
2642 mid >>= 1;
2643 mid |= -(mid & ((BigElement)1 <<
2644 (sizeof(BigElement) * 8 - 1 - imm)));
2645 mid += rBit;
2646 if (mid != (Element)mid) {
2647 destElem = mask(sizeof(Element) * 8 - 1);
2648 if (srcElem1 < 0)
2649 destElem = ~destElem;
2650 fpscr.qc = 1;
2651 } else {
2652 destElem = mid;
2653 }
2654 } else {
2655 if (srcElem1 != (Element)srcElem1) {
2656 destElem = mask(sizeof(Element) * 8 - 1);
2657 if (srcElem1 < 0)
2658 destElem = ~destElem;
2659 fpscr.qc = 1;
2660 } else {
2661 destElem = srcElem1;
2662 }
2663 }
2664 Fpscr = fpscr;
2665 '''
2666 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2667 smallSignedTypes, vqrshrnCode)
2668
2669 vqrshrunCode = '''
2670 FPSCR fpscr = (FPSCR)Fpscr;
2671 if (imm > sizeof(srcElem1) * 8) {
2672 if (srcElem1 != 0)
2673 fpscr.qc = 1;
2674 destElem = 0;
2675 } else if (imm) {
2676 BigElement mid = (srcElem1 >> (imm - 1));
2677 uint64_t rBit = mid & 0x1;
2678 mid >>= 1;
2679 mid += rBit;
2680 if (mid != (Element)mid) {
2681 destElem = mask(sizeof(Element) * 8);
2682 fpscr.qc = 1;
2683 } else {
2684 destElem = mid;
2685 }
2686 } else {
2687 if (srcElem1 != (Element)srcElem1) {
2688 destElem = mask(sizeof(Element) * 8 - 1);
2370 mid |= -(mid & ((Element)1 <<
2371 (sizeof(Element) * 8 - 1 - imm)));
2372 }
2373 }
2374 destElem += mid;
2375 '''
2376 twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2377 twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2378
2379 vrshrCode = '''
2380 if (imm > sizeof(srcElem1) * 8) {
2381 destElem = 0;
2382 } else if (imm) {
2383 Element rBit = bits(srcElem1, imm - 1);
2384 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2385 } else {
2386 destElem = srcElem1;
2387 }
2388 '''
2389 twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2390 twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2391
2392 vrsraCode = '''
2393 if (imm > sizeof(srcElem1) * 8) {
2394 destElem += 0;
2395 } else if (imm) {
2396 Element rBit = bits(srcElem1, imm - 1);
2397 destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2398 } else {
2399 destElem += srcElem1;
2400 }
2401 '''
2402 twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2403 twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2404
2405 vsriCode = '''
2406 if (imm >= sizeof(Element) * 8)
2407 destElem = destElem;
2408 else
2409 destElem = (srcElem1 >> imm) |
2410 (destElem & ~mask(sizeof(Element) * 8 - imm));
2411 '''
2412 twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2413 twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2414
2415 vshlCode = '''
2416 if (imm >= sizeof(Element) * 8)
2417 destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2418 else
2419 destElem = srcElem1 << imm;
2420 '''
2421 twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2422 twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2423
2424 vsliCode = '''
2425 if (imm >= sizeof(Element) * 8)
2426 destElem = destElem;
2427 else
2428 destElem = (srcElem1 << imm) | (destElem & mask(imm));
2429 '''
2430 twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2431 twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2432
2433 vqshlCode = '''
2434 FPSCR fpscr = (FPSCR)Fpscr;
2435 if (imm >= sizeof(Element) * 8) {
2436 if (srcElem1 != 0) {
2437 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2438 if (srcElem1 > 0)
2439 destElem = ~destElem;
2440 fpscr.qc = 1;
2441 } else {
2442 destElem = 0;
2443 }
2444 } else if (imm) {
2445 destElem = (srcElem1 << imm);
2446 uint64_t topBits = bits((uint64_t)srcElem1,
2447 sizeof(Element) * 8 - 1,
2448 sizeof(Element) * 8 - 1 - imm);
2449 if (topBits != 0 && topBits != mask(imm + 1)) {
2450 destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2451 if (srcElem1 > 0)
2452 destElem = ~destElem;
2453 fpscr.qc = 1;
2454 }
2455 } else {
2456 destElem = srcElem1;
2457 }
2458 Fpscr = fpscr;
2459 '''
2460 twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2461 twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2462
2463 vqshluCode = '''
2464 FPSCR fpscr = (FPSCR)Fpscr;
2465 if (imm >= sizeof(Element) * 8) {
2466 if (srcElem1 != 0) {
2467 destElem = mask(sizeof(Element) * 8);
2468 fpscr.qc = 1;
2469 } else {
2470 destElem = 0;
2471 }
2472 } else if (imm) {
2473 destElem = (srcElem1 << imm);
2474 uint64_t topBits = bits((uint64_t)srcElem1,
2475 sizeof(Element) * 8 - 1,
2476 sizeof(Element) * 8 - imm);
2477 if (topBits != 0) {
2478 destElem = mask(sizeof(Element) * 8);
2479 fpscr.qc = 1;
2480 }
2481 } else {
2482 destElem = srcElem1;
2483 }
2484 Fpscr = fpscr;
2485 '''
2486 twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2487 twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2488
2489 vqshlusCode = '''
2490 FPSCR fpscr = (FPSCR)Fpscr;
2491 if (imm >= sizeof(Element) * 8) {
2492 if (srcElem1 < 0) {
2493 destElem = 0;
2494 fpscr.qc = 1;
2495 } else if (srcElem1 > 0) {
2496 destElem = mask(sizeof(Element) * 8);
2497 fpscr.qc = 1;
2498 } else {
2499 destElem = 0;
2500 }
2501 } else if (imm) {
2502 destElem = (srcElem1 << imm);
2503 uint64_t topBits = bits((uint64_t)srcElem1,
2504 sizeof(Element) * 8 - 1,
2505 sizeof(Element) * 8 - imm);
2506 if (srcElem1 < 0) {
2507 destElem = 0;
2508 fpscr.qc = 1;
2509 } else if (topBits != 0) {
2510 destElem = mask(sizeof(Element) * 8);
2511 fpscr.qc = 1;
2512 }
2513 } else {
2514 if (srcElem1 < 0) {
2515 fpscr.qc = 1;
2516 destElem = 0;
2517 } else {
2518 destElem = srcElem1;
2519 }
2520 }
2521 Fpscr = fpscr;
2522 '''
2523 twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2524 twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2525
2526 vshrnCode = '''
2527 if (imm >= sizeof(srcElem1) * 8) {
2528 destElem = 0;
2529 } else {
2530 destElem = srcElem1 >> imm;
2531 }
2532 '''
2533 twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2534
2535 vrshrnCode = '''
2536 if (imm > sizeof(srcElem1) * 8) {
2537 destElem = 0;
2538 } else if (imm) {
2539 Element rBit = bits(srcElem1, imm - 1);
2540 destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2541 } else {
2542 destElem = srcElem1;
2543 }
2544 '''
2545 twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2546
2547 vqshrnCode = '''
2548 FPSCR fpscr = (FPSCR)Fpscr;
2549 if (imm > sizeof(srcElem1) * 8) {
2550 if (srcElem1 != 0 && srcElem1 != -1)
2551 fpscr.qc = 1;
2552 destElem = 0;
2553 } else if (imm) {
2554 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2555 mid |= -(mid & ((BigElement)1 <<
2556 (sizeof(BigElement) * 8 - 1 - imm)));
2557 if (mid != (Element)mid) {
2558 destElem = mask(sizeof(Element) * 8 - 1);
2559 if (srcElem1 < 0)
2560 destElem = ~destElem;
2561 fpscr.qc = 1;
2562 } else {
2563 destElem = mid;
2564 }
2565 } else {
2566 destElem = srcElem1;
2567 }
2568 Fpscr = fpscr;
2569 '''
2570 twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2571
2572 vqshrunCode = '''
2573 FPSCR fpscr = (FPSCR)Fpscr;
2574 if (imm > sizeof(srcElem1) * 8) {
2575 if (srcElem1 != 0)
2576 fpscr.qc = 1;
2577 destElem = 0;
2578 } else if (imm) {
2579 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2580 if (mid != (Element)mid) {
2581 destElem = mask(sizeof(Element) * 8);
2582 fpscr.qc = 1;
2583 } else {
2584 destElem = mid;
2585 }
2586 } else {
2587 destElem = srcElem1;
2588 }
2589 Fpscr = fpscr;
2590 '''
2591 twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2592 smallUnsignedTypes, vqshrunCode)
2593
2594 vqshrunsCode = '''
2595 FPSCR fpscr = (FPSCR)Fpscr;
2596 if (imm > sizeof(srcElem1) * 8) {
2597 if (srcElem1 != 0)
2598 fpscr.qc = 1;
2599 destElem = 0;
2600 } else if (imm) {
2601 BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2602 if (bits(mid, sizeof(BigElement) * 8 - 1,
2603 sizeof(Element) * 8) != 0) {
2604 if (srcElem1 < 0) {
2605 destElem = 0;
2606 } else {
2607 destElem = mask(sizeof(Element) * 8);
2608 }
2609 fpscr.qc = 1;
2610 } else {
2611 destElem = mid;
2612 }
2613 } else {
2614 destElem = srcElem1;
2615 }
2616 Fpscr = fpscr;
2617 '''
2618 twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2619 smallSignedTypes, vqshrunsCode)
2620
2621 vqrshrnCode = '''
2622 FPSCR fpscr = (FPSCR)Fpscr;
2623 if (imm > sizeof(srcElem1) * 8) {
2624 if (srcElem1 != 0 && srcElem1 != -1)
2625 fpscr.qc = 1;
2626 destElem = 0;
2627 } else if (imm) {
2628 BigElement mid = (srcElem1 >> (imm - 1));
2629 uint64_t rBit = mid & 0x1;
2630 mid >>= 1;
2631 mid |= -(mid & ((BigElement)1 <<
2632 (sizeof(BigElement) * 8 - 1 - imm)));
2633 mid += rBit;
2634 if (mid != (Element)mid) {
2635 destElem = mask(sizeof(Element) * 8 - 1);
2636 if (srcElem1 < 0)
2637 destElem = ~destElem;
2638 fpscr.qc = 1;
2639 } else {
2640 destElem = mid;
2641 }
2642 } else {
2643 if (srcElem1 != (Element)srcElem1) {
2644 destElem = mask(sizeof(Element) * 8 - 1);
2645 if (srcElem1 < 0)
2646 destElem = ~destElem;
2647 fpscr.qc = 1;
2648 } else {
2649 destElem = srcElem1;
2650 }
2651 }
2652 Fpscr = fpscr;
2653 '''
2654 twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2655 smallSignedTypes, vqrshrnCode)
2656
2657 vqrshrunCode = '''
2658 FPSCR fpscr = (FPSCR)Fpscr;
2659 if (imm > sizeof(srcElem1) * 8) {
2660 if (srcElem1 != 0)
2661 fpscr.qc = 1;
2662 destElem = 0;
2663 } else if (imm) {
2664 BigElement mid = (srcElem1 >> (imm - 1));
2665 uint64_t rBit = mid & 0x1;
2666 mid >>= 1;
2667 mid += rBit;
2668 if (mid != (Element)mid) {
2669 destElem = mask(sizeof(Element) * 8);
2670 fpscr.qc = 1;
2671 } else {
2672 destElem = mid;
2673 }
2674 } else {
2675 if (srcElem1 != (Element)srcElem1) {
2676 destElem = mask(sizeof(Element) * 8 - 1);
2689 if (srcElem1 < 0)
2690 destElem = ~destElem;
2691 fpscr.qc = 1;
2692 } else {
2693 destElem = srcElem1;
2694 }
2695 }
2696 Fpscr = fpscr;
2697 '''
2698 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2699 smallUnsignedTypes, vqrshrunCode)
2700
2701 vqrshrunsCode = '''
2702 FPSCR fpscr = (FPSCR)Fpscr;
2703 if (imm > sizeof(srcElem1) * 8) {
2704 if (srcElem1 != 0)
2705 fpscr.qc = 1;
2706 destElem = 0;
2707 } else if (imm) {
2708 BigElement mid = (srcElem1 >> (imm - 1));
2709 uint64_t rBit = mid & 0x1;
2710 mid >>= 1;
2711 mid |= -(mid & ((BigElement)1 <<
2712 (sizeof(BigElement) * 8 - 1 - imm)));
2713 mid += rBit;
2714 if (bits(mid, sizeof(BigElement) * 8 - 1,
2715 sizeof(Element) * 8) != 0) {
2716 if (srcElem1 < 0) {
2717 destElem = 0;
2718 } else {
2719 destElem = mask(sizeof(Element) * 8);
2720 }
2721 fpscr.qc = 1;
2722 } else {
2723 destElem = mid;
2724 }
2725 } else {
2726 if (srcElem1 < 0) {
2727 fpscr.qc = 1;
2728 destElem = 0;
2729 } else {
2730 destElem = srcElem1;
2731 }
2732 }
2733 Fpscr = fpscr;
2734 '''
2735 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2736 smallSignedTypes, vqrshrunsCode)
2737
2738 vshllCode = '''
2739 if (imm >= sizeof(destElem) * 8) {
2740 destElem = 0;
2741 } else {
2742 destElem = (BigElement)srcElem1 << imm;
2743 }
2744 '''
2745 twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2746
2747 vmovlCode = '''
2748 destElem = srcElem1;
2749 '''
2750 twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2751
2752 vcvt2ufxCode = '''
2753 FPSCR fpscr = Fpscr;
2754 if (flushToZero(srcElem1))
2755 fpscr.idc = 1;
2756 VfpSavedState state = prepFpState(VfpRoundNearest);
2757 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2758 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2759 __asm__ __volatile__("" :: "m" (destReg));
2760 finishVfp(fpscr, state, true);
2761 Fpscr = fpscr;
2762 '''
2763 twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2764 2, vcvt2ufxCode, toInt = True)
2765 twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2766 4, vcvt2ufxCode, toInt = True)
2767
2768 vcvt2sfxCode = '''
2769 FPSCR fpscr = Fpscr;
2770 if (flushToZero(srcElem1))
2771 fpscr.idc = 1;
2772 VfpSavedState state = prepFpState(VfpRoundNearest);
2773 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2774 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2775 __asm__ __volatile__("" :: "m" (destReg));
2776 finishVfp(fpscr, state, true);
2777 Fpscr = fpscr;
2778 '''
2779 twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2780 2, vcvt2sfxCode, toInt = True)
2781 twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2782 4, vcvt2sfxCode, toInt = True)
2783
2784 vcvtu2fpCode = '''
2785 FPSCR fpscr = Fpscr;
2786 VfpSavedState state = prepFpState(VfpRoundNearest);
2787 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2788 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2789 __asm__ __volatile__("" :: "m" (destElem));
2790 finishVfp(fpscr, state, true);
2791 Fpscr = fpscr;
2792 '''
2793 twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2794 2, vcvtu2fpCode, fromInt = True)
2795 twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2796 4, vcvtu2fpCode, fromInt = True)
2797
2798 vcvts2fpCode = '''
2799 FPSCR fpscr = Fpscr;
2800 VfpSavedState state = prepFpState(VfpRoundNearest);
2801 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2802 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2803 __asm__ __volatile__("" :: "m" (destElem));
2804 finishVfp(fpscr, state, true);
2805 Fpscr = fpscr;
2806 '''
2807 twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2808 2, vcvts2fpCode, fromInt = True)
2809 twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2810 4, vcvts2fpCode, fromInt = True)
2811
2812 vcvts2hCode = '''
2813 FPSCR fpscr = Fpscr;
2814 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2815 if (flushToZero(srcFp1))
2816 fpscr.idc = 1;
2817 VfpSavedState state = prepFpState(VfpRoundNearest);
2818 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2819 : "m" (srcFp1), "m" (destElem));
2820 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2821 fpscr.ahp, srcFp1);
2822 __asm__ __volatile__("" :: "m" (destElem));
2823 finishVfp(fpscr, state, true);
2824 Fpscr = fpscr;
2825 '''
2826 twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2827
2828 vcvth2sCode = '''
2829 FPSCR fpscr = Fpscr;
2830 VfpSavedState state = prepFpState(VfpRoundNearest);
2831 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2832 : "m" (srcElem1), "m" (destElem));
2833 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2834 __asm__ __volatile__("" :: "m" (destElem));
2835 finishVfp(fpscr, state, true);
2836 Fpscr = fpscr;
2837 '''
2838 twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2839
2840 vrsqrteCode = '''
2841 destElem = unsignedRSqrtEstimate(srcElem1);
2842 '''
2843 twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2844 twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2845
2846 vrsqrtefpCode = '''
2847 FPSCR fpscr = Fpscr;
2848 if (flushToZero(srcReg1))
2849 fpscr.idc = 1;
2850 destReg = fprSqrtEstimate(fpscr, srcReg1);
2851 Fpscr = fpscr;
2852 '''
2853 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2854 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2855
2856 vrecpeCode = '''
2857 destElem = unsignedRecipEstimate(srcElem1);
2858 '''
2859 twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2860 twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2861
2862 vrecpefpCode = '''
2863 FPSCR fpscr = Fpscr;
2864 if (flushToZero(srcReg1))
2865 fpscr.idc = 1;
2866 destReg = fpRecipEstimate(fpscr, srcReg1);
2867 Fpscr = fpscr;
2868 '''
2869 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2870 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2871
2872 vrev16Code = '''
2873 destElem = srcElem1;
2874 unsigned groupSize = ((1 << 1) / sizeof(Element));
2875 unsigned reverseMask = (groupSize - 1);
2876 j = i ^ reverseMask;
2877 '''
2878 twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2879 twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2880 vrev32Code = '''
2881 destElem = srcElem1;
2882 unsigned groupSize = ((1 << 2) / sizeof(Element));
2883 unsigned reverseMask = (groupSize - 1);
2884 j = i ^ reverseMask;
2885 '''
2886 twoRegMiscInst("vrev32", "NVrev32D",
2887 ("uint8_t", "uint16_t"), 2, vrev32Code)
2888 twoRegMiscInst("vrev32", "NVrev32Q",
2889 ("uint8_t", "uint16_t"), 4, vrev32Code)
2890 vrev64Code = '''
2891 destElem = srcElem1;
2892 unsigned groupSize = ((1 << 3) / sizeof(Element));
2893 unsigned reverseMask = (groupSize - 1);
2894 j = i ^ reverseMask;
2895 '''
2896 twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2897 twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2898
2899 vpaddlCode = '''
2900 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2901 '''
2902 twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2903 twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2904
2905 vpadalCode = '''
2906 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2907 '''
2908 twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2909 twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2910
2911 vclsCode = '''
2912 unsigned count = 0;
2913 if (srcElem1 < 0) {
2914 srcElem1 <<= 1;
2915 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2916 count++;
2917 srcElem1 <<= 1;
2918 }
2919 } else {
2920 srcElem1 <<= 1;
2921 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2922 count++;
2923 srcElem1 <<= 1;
2924 }
2925 }
2926 destElem = count;
2927 '''
2928 twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2929 twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2930
2931 vclzCode = '''
2932 unsigned count = 0;
2933 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2934 count++;
2935 srcElem1 <<= 1;
2936 }
2937 destElem = count;
2938 '''
2939 twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2940 twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2941
2942 vcntCode = '''
2943 unsigned count = 0;
2944 while (srcElem1 && count < sizeof(Element) * 8) {
2945 count += srcElem1 & 0x1;
2946 srcElem1 >>= 1;
2947 }
2948 destElem = count;
2949 '''
2950 twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2951 twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2952
2953 vmvnCode = '''
2954 destElem = ~srcElem1;
2955 '''
2956 twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2957 twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2958
2959 vqabsCode = '''
2960 FPSCR fpscr = (FPSCR)Fpscr;
2961 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2962 fpscr.qc = 1;
2963 destElem = ~srcElem1;
2964 } else if (srcElem1 < 0) {
2965 destElem = -srcElem1;
2966 } else {
2967 destElem = srcElem1;
2968 }
2969 Fpscr = fpscr;
2970 '''
2971 twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2972 twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2973
2974 vqnegCode = '''
2975 FPSCR fpscr = (FPSCR)Fpscr;
2976 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2977 fpscr.qc = 1;
2978 destElem = ~srcElem1;
2979 } else {
2980 destElem = -srcElem1;
2981 }
2982 Fpscr = fpscr;
2983 '''
2984 twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2985 twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2986
2987 vabsCode = '''
2988 if (srcElem1 < 0) {
2989 destElem = -srcElem1;
2990 } else {
2991 destElem = srcElem1;
2992 }
2993 '''
2994 twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2995 twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2996 vabsfpCode = '''
2997 union
2998 {
2999 uint32_t i;
3000 float f;
3001 } cStruct;
3002 cStruct.f = srcReg1;
3003 cStruct.i &= mask(sizeof(Element) * 8 - 1);
3004 destReg = cStruct.f;
3005 '''
3006 twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
3007 twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
3008
3009 vnegCode = '''
3010 destElem = -srcElem1;
3011 '''
3012 twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
3013 twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
3014 vnegfpCode = '''
3015 destReg = -srcReg1;
3016 '''
3017 twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
3018 twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
3019
3020 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3021 twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3022 twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3023 vcgtfpCode = '''
3024 FPSCR fpscr = (FPSCR)Fpscr;
3025 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3026 true, true, VfpRoundNearest);
3027 destReg = (res == 0) ? -1 : 0;
3028 if (res == 2.0)
3029 fpscr.ioc = 1;
3030 Fpscr = fpscr;
3031 '''
3032 twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3033 2, vcgtfpCode, toInt = True)
3034 twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3035 4, vcgtfpCode, toInt = True)
3036
3037 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3038 twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3039 twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3040 vcgefpCode = '''
3041 FPSCR fpscr = (FPSCR)Fpscr;
3042 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3043 true, true, VfpRoundNearest);
3044 destReg = (res == 0) ? -1 : 0;
3045 if (res == 2.0)
3046 fpscr.ioc = 1;
3047 Fpscr = fpscr;
3048 '''
3049 twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3050 2, vcgefpCode, toInt = True)
3051 twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3052 4, vcgefpCode, toInt = True)
3053
3054 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3055 twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3056 twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3057 vceqfpCode = '''
3058 FPSCR fpscr = (FPSCR)Fpscr;
3059 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3060 true, true, VfpRoundNearest);
3061 destReg = (res == 0) ? -1 : 0;
3062 if (res == 2.0)
3063 fpscr.ioc = 1;
3064 Fpscr = fpscr;
3065 '''
3066 twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3067 2, vceqfpCode, toInt = True)
3068 twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3069 4, vceqfpCode, toInt = True)
3070
3071 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3072 twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3073 twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3074 vclefpCode = '''
3075 FPSCR fpscr = (FPSCR)Fpscr;
3076 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3077 true, true, VfpRoundNearest);
3078 destReg = (res == 0) ? -1 : 0;
3079 if (res == 2.0)
3080 fpscr.ioc = 1;
3081 Fpscr = fpscr;
3082 '''
3083 twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3084 2, vclefpCode, toInt = True)
3085 twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3086 4, vclefpCode, toInt = True)
3087
3088 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3089 twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3090 twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3091 vcltfpCode = '''
3092 FPSCR fpscr = (FPSCR)Fpscr;
3093 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3094 true, true, VfpRoundNearest);
3095 destReg = (res == 0) ? -1 : 0;
3096 if (res == 2.0)
3097 fpscr.ioc = 1;
3098 Fpscr = fpscr;
3099 '''
3100 twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3101 2, vcltfpCode, toInt = True)
3102 twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3103 4, vcltfpCode, toInt = True)
3104
3105 vswpCode = '''
3106 FloatRegBits mid;
3107 for (unsigned r = 0; r < rCount; r++) {
3108 mid = srcReg1.regs[r];
3109 srcReg1.regs[r] = destReg.regs[r];
3110 destReg.regs[r] = mid;
3111 }
3112 '''
3113 twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3114 twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3115
3116 vtrnCode = '''
3117 Element mid;
3118 for (unsigned i = 0; i < eCount; i += 2) {
3119 mid = srcReg1.elements[i];
3120 srcReg1.elements[i] = destReg.elements[i + 1];
3121 destReg.elements[i + 1] = mid;
3122 }
3123 '''
3124 twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3125 twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3126
3127 vuzpCode = '''
3128 Element mid[eCount];
3129 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3130 for (unsigned i = 0; i < eCount / 2; i++) {
3131 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3132 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3133 destReg.elements[i] = destReg.elements[2 * i];
3134 }
3135 for (unsigned i = 0; i < eCount / 2; i++) {
3136 destReg.elements[eCount / 2 + i] = mid[2 * i];
3137 }
3138 '''
3139 twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3140 twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3141
3142 vzipCode = '''
3143 Element mid[eCount];
3144 memcpy(&mid, &destReg, sizeof(destReg));
3145 for (unsigned i = 0; i < eCount / 2; i++) {
3146 destReg.elements[2 * i] = mid[i];
3147 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3148 }
3149 for (int i = 0; i < eCount / 2; i++) {
3150 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3151 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3152 }
3153 '''
3154 twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3155 twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3156
3157 vmovnCode = 'destElem = srcElem1;'
3158 twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3159
3160 vdupCode = 'destElem = srcElem1;'
3161 twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3162 twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3163
3164 def vdupGprInst(name, Name, types, rCount):
3165 global header_output, exec_output
3166 eWalkCode = '''
3167 RegVect destReg;
3168 for (unsigned i = 0; i < eCount; i++) {
3169 destReg.elements[i] = htog((Element)Op1);
3170 }
3171 '''
3172 for reg in range(rCount):
3173 eWalkCode += '''
3174 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3175 ''' % { "reg" : reg }
3176 iop = InstObjParams(name, Name,
3177 "RegRegOp",
3178 { "code": eWalkCode,
3179 "r_count": rCount,
3180 "predicate_test": predicateTest }, [])
3181 header_output += NeonRegRegOpDeclare.subst(iop)
3182 exec_output += NeonEqualRegExecute.subst(iop)
3183 for type in types:
3184 substDict = { "targs" : type,
3185 "class_name" : Name }
3186 exec_output += NeonExecDeclare.subst(substDict)
3187 vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3188 vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3189
3190 vmovCode = 'destElem = imm;'
3191 oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3192 oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3193
3194 vorrCode = 'destElem |= imm;'
3195 oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3196 oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3197
3198 vmvnCode = 'destElem = ~imm;'
3199 oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3200 oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3201
3202 vbicCode = 'destElem &= ~imm;'
3203 oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3204 oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3205
3206 vqmovnCode = '''
3207 FPSCR fpscr = (FPSCR)Fpscr;
3208 destElem = srcElem1;
3209 if ((BigElement)destElem != srcElem1) {
3210 fpscr.qc = 1;
3211 destElem = mask(sizeof(Element) * 8 - 1);
3212 if (srcElem1 < 0)
3213 destElem = ~destElem;
3214 }
3215 Fpscr = fpscr;
3216 '''
3217 twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3218
3219 vqmovunCode = '''
3220 FPSCR fpscr = (FPSCR)Fpscr;
3221 destElem = srcElem1;
3222 if ((BigElement)destElem != srcElem1) {
3223 fpscr.qc = 1;
3224 destElem = mask(sizeof(Element) * 8);
3225 }
3226 Fpscr = fpscr;
3227 '''
3228 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3229 smallUnsignedTypes, vqmovunCode)
3230
3231 vqmovunsCode = '''
3232 FPSCR fpscr = (FPSCR)Fpscr;
3233 destElem = srcElem1;
3234 if (srcElem1 < 0 ||
3235 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3236 fpscr.qc = 1;
3237 destElem = mask(sizeof(Element) * 8);
3238 if (srcElem1 < 0)
3239 destElem = ~destElem;
3240 }
3241 Fpscr = fpscr;
3242 '''
3243 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3244 smallSignedTypes, vqmovunsCode)
3245
3246 def buildVext(name, Name, types, rCount, op):
3247 global header_output, exec_output
3248 eWalkCode = '''
3249 RegVect srcReg1, srcReg2, destReg;
3250 '''
3251 for reg in range(rCount):
3252 eWalkCode += '''
3253 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3254 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3255 ''' % { "reg" : reg }
3256 eWalkCode += op
3257 for reg in range(rCount):
3258 eWalkCode += '''
3259 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3260 ''' % { "reg" : reg }
3261 iop = InstObjParams(name, Name,
3262 "RegRegRegImmOp",
3263 { "code": eWalkCode,
3264 "r_count": rCount,
3265 "predicate_test": predicateTest }, [])
3266 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3267 exec_output += NeonEqualRegExecute.subst(iop)
3268 for type in types:
3269 substDict = { "targs" : type,
3270 "class_name" : Name }
3271 exec_output += NeonExecDeclare.subst(substDict)
3272
3273 vextCode = '''
3274 for (unsigned i = 0; i < eCount; i++) {
3275 unsigned index = i + imm;
3276 if (index < eCount) {
3277 destReg.elements[i] = srcReg1.elements[index];
3278 } else {
3279 index -= eCount;
3280 assert(index < eCount);
3281 destReg.elements[i] = srcReg2.elements[index];
3282 }
3283 }
3284 '''
3285 buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3286 buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3287
3288 def buildVtbxl(name, Name, length, isVtbl):
3289 global header_output, decoder_output, exec_output
3290 code = '''
3291 union
3292 {
3293 uint8_t bytes[32];
3294 FloatRegBits regs[8];
3295 } table;
3296
3297 union
3298 {
3299 uint8_t bytes[8];
3300 FloatRegBits regs[2];
3301 } destReg, srcReg2;
3302
3303 const unsigned length = %(length)d;
3304 const bool isVtbl = %(isVtbl)s;
3305
3306 srcReg2.regs[0] = htog(FpOp2P0.uw);
3307 srcReg2.regs[1] = htog(FpOp2P1.uw);
3308
3309 destReg.regs[0] = htog(FpDestP0.uw);
3310 destReg.regs[1] = htog(FpDestP1.uw);
3311 ''' % { "length" : length, "isVtbl" : isVtbl }
3312 for reg in range(8):
3313 if reg < length * 2:
3314 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3315 { "reg" : reg }
3316 else:
3317 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3318 code += '''
3319 for (unsigned i = 0; i < sizeof(destReg); i++) {
3320 uint8_t index = srcReg2.bytes[i];
3321 if (index < 8 * length) {
3322 destReg.bytes[i] = table.bytes[index];
3323 } else {
3324 if (isVtbl)
3325 destReg.bytes[i] = 0;
3326 // else destReg.bytes[i] unchanged
3327 }
3328 }
3329
3330 FpDestP0.uw = gtoh(destReg.regs[0]);
3331 FpDestP1.uw = gtoh(destReg.regs[1]);
3332 '''
3333 iop = InstObjParams(name, Name,
3334 "RegRegRegOp",
3335 { "code": code,
3336 "predicate_test": predicateTest }, [])
3337 header_output += RegRegRegOpDeclare.subst(iop)
3338 decoder_output += RegRegRegOpConstructor.subst(iop)
3339 exec_output += PredOpExecute.subst(iop)
3340
3341 buildVtbxl("vtbl", "NVtbl1", 1, "true")
3342 buildVtbxl("vtbl", "NVtbl2", 2, "true")
3343 buildVtbxl("vtbl", "NVtbl3", 3, "true")
3344 buildVtbxl("vtbl", "NVtbl4", 4, "true")
3345
3346 buildVtbxl("vtbx", "NVtbx1", 1, "false")
3347 buildVtbxl("vtbx", "NVtbx2", 2, "false")
3348 buildVtbxl("vtbx", "NVtbx3", 3, "false")
3349 buildVtbxl("vtbx", "NVtbx4", 4, "false")
3350}};
2677 fpscr.qc = 1;
2678 } else {
2679 destElem = srcElem1;
2680 }
2681 }
2682 Fpscr = fpscr;
2683 '''
2684 twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2685 smallUnsignedTypes, vqrshrunCode)
2686
2687 vqrshrunsCode = '''
2688 FPSCR fpscr = (FPSCR)Fpscr;
2689 if (imm > sizeof(srcElem1) * 8) {
2690 if (srcElem1 != 0)
2691 fpscr.qc = 1;
2692 destElem = 0;
2693 } else if (imm) {
2694 BigElement mid = (srcElem1 >> (imm - 1));
2695 uint64_t rBit = mid & 0x1;
2696 mid >>= 1;
2697 mid |= -(mid & ((BigElement)1 <<
2698 (sizeof(BigElement) * 8 - 1 - imm)));
2699 mid += rBit;
2700 if (bits(mid, sizeof(BigElement) * 8 - 1,
2701 sizeof(Element) * 8) != 0) {
2702 if (srcElem1 < 0) {
2703 destElem = 0;
2704 } else {
2705 destElem = mask(sizeof(Element) * 8);
2706 }
2707 fpscr.qc = 1;
2708 } else {
2709 destElem = mid;
2710 }
2711 } else {
2712 if (srcElem1 < 0) {
2713 fpscr.qc = 1;
2714 destElem = 0;
2715 } else {
2716 destElem = srcElem1;
2717 }
2718 }
2719 Fpscr = fpscr;
2720 '''
2721 twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2722 smallSignedTypes, vqrshrunsCode)
2723
2724 vshllCode = '''
2725 if (imm >= sizeof(destElem) * 8) {
2726 destElem = 0;
2727 } else {
2728 destElem = (BigElement)srcElem1 << imm;
2729 }
2730 '''
2731 twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2732
2733 vmovlCode = '''
2734 destElem = srcElem1;
2735 '''
2736 twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2737
2738 vcvt2ufxCode = '''
2739 FPSCR fpscr = Fpscr;
2740 if (flushToZero(srcElem1))
2741 fpscr.idc = 1;
2742 VfpSavedState state = prepFpState(VfpRoundNearest);
2743 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2744 destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2745 __asm__ __volatile__("" :: "m" (destReg));
2746 finishVfp(fpscr, state, true);
2747 Fpscr = fpscr;
2748 '''
2749 twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2750 2, vcvt2ufxCode, toInt = True)
2751 twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2752 4, vcvt2ufxCode, toInt = True)
2753
2754 vcvt2sfxCode = '''
2755 FPSCR fpscr = Fpscr;
2756 if (flushToZero(srcElem1))
2757 fpscr.idc = 1;
2758 VfpSavedState state = prepFpState(VfpRoundNearest);
2759 __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2760 destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2761 __asm__ __volatile__("" :: "m" (destReg));
2762 finishVfp(fpscr, state, true);
2763 Fpscr = fpscr;
2764 '''
2765 twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2766 2, vcvt2sfxCode, toInt = True)
2767 twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2768 4, vcvt2sfxCode, toInt = True)
2769
2770 vcvtu2fpCode = '''
2771 FPSCR fpscr = Fpscr;
2772 VfpSavedState state = prepFpState(VfpRoundNearest);
2773 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2774 destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2775 __asm__ __volatile__("" :: "m" (destElem));
2776 finishVfp(fpscr, state, true);
2777 Fpscr = fpscr;
2778 '''
2779 twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2780 2, vcvtu2fpCode, fromInt = True)
2781 twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2782 4, vcvtu2fpCode, fromInt = True)
2783
2784 vcvts2fpCode = '''
2785 FPSCR fpscr = Fpscr;
2786 VfpSavedState state = prepFpState(VfpRoundNearest);
2787 __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2788 destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2789 __asm__ __volatile__("" :: "m" (destElem));
2790 finishVfp(fpscr, state, true);
2791 Fpscr = fpscr;
2792 '''
2793 twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2794 2, vcvts2fpCode, fromInt = True)
2795 twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2796 4, vcvts2fpCode, fromInt = True)
2797
2798 vcvts2hCode = '''
2799 FPSCR fpscr = Fpscr;
2800 float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2801 if (flushToZero(srcFp1))
2802 fpscr.idc = 1;
2803 VfpSavedState state = prepFpState(VfpRoundNearest);
2804 __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2805 : "m" (srcFp1), "m" (destElem));
2806 destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2807 fpscr.ahp, srcFp1);
2808 __asm__ __volatile__("" :: "m" (destElem));
2809 finishVfp(fpscr, state, true);
2810 Fpscr = fpscr;
2811 '''
2812 twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2813
2814 vcvth2sCode = '''
2815 FPSCR fpscr = Fpscr;
2816 VfpSavedState state = prepFpState(VfpRoundNearest);
2817 __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2818 : "m" (srcElem1), "m" (destElem));
2819 destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2820 __asm__ __volatile__("" :: "m" (destElem));
2821 finishVfp(fpscr, state, true);
2822 Fpscr = fpscr;
2823 '''
2824 twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2825
2826 vrsqrteCode = '''
2827 destElem = unsignedRSqrtEstimate(srcElem1);
2828 '''
2829 twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2830 twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2831
2832 vrsqrtefpCode = '''
2833 FPSCR fpscr = Fpscr;
2834 if (flushToZero(srcReg1))
2835 fpscr.idc = 1;
2836 destReg = fprSqrtEstimate(fpscr, srcReg1);
2837 Fpscr = fpscr;
2838 '''
2839 twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2840 twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2841
2842 vrecpeCode = '''
2843 destElem = unsignedRecipEstimate(srcElem1);
2844 '''
2845 twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2846 twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2847
2848 vrecpefpCode = '''
2849 FPSCR fpscr = Fpscr;
2850 if (flushToZero(srcReg1))
2851 fpscr.idc = 1;
2852 destReg = fpRecipEstimate(fpscr, srcReg1);
2853 Fpscr = fpscr;
2854 '''
2855 twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2856 twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2857
2858 vrev16Code = '''
2859 destElem = srcElem1;
2860 unsigned groupSize = ((1 << 1) / sizeof(Element));
2861 unsigned reverseMask = (groupSize - 1);
2862 j = i ^ reverseMask;
2863 '''
2864 twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2865 twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2866 vrev32Code = '''
2867 destElem = srcElem1;
2868 unsigned groupSize = ((1 << 2) / sizeof(Element));
2869 unsigned reverseMask = (groupSize - 1);
2870 j = i ^ reverseMask;
2871 '''
2872 twoRegMiscInst("vrev32", "NVrev32D",
2873 ("uint8_t", "uint16_t"), 2, vrev32Code)
2874 twoRegMiscInst("vrev32", "NVrev32Q",
2875 ("uint8_t", "uint16_t"), 4, vrev32Code)
2876 vrev64Code = '''
2877 destElem = srcElem1;
2878 unsigned groupSize = ((1 << 3) / sizeof(Element));
2879 unsigned reverseMask = (groupSize - 1);
2880 j = i ^ reverseMask;
2881 '''
2882 twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2883 twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2884
2885 vpaddlCode = '''
2886 destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2887 '''
2888 twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2889 twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2890
2891 vpadalCode = '''
2892 destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2893 '''
2894 twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2895 twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2896
2897 vclsCode = '''
2898 unsigned count = 0;
2899 if (srcElem1 < 0) {
2900 srcElem1 <<= 1;
2901 while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2902 count++;
2903 srcElem1 <<= 1;
2904 }
2905 } else {
2906 srcElem1 <<= 1;
2907 while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2908 count++;
2909 srcElem1 <<= 1;
2910 }
2911 }
2912 destElem = count;
2913 '''
2914 twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2915 twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2916
2917 vclzCode = '''
2918 unsigned count = 0;
2919 while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2920 count++;
2921 srcElem1 <<= 1;
2922 }
2923 destElem = count;
2924 '''
2925 twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2926 twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2927
2928 vcntCode = '''
2929 unsigned count = 0;
2930 while (srcElem1 && count < sizeof(Element) * 8) {
2931 count += srcElem1 & 0x1;
2932 srcElem1 >>= 1;
2933 }
2934 destElem = count;
2935 '''
2936 twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2937 twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2938
2939 vmvnCode = '''
2940 destElem = ~srcElem1;
2941 '''
2942 twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2943 twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2944
2945 vqabsCode = '''
2946 FPSCR fpscr = (FPSCR)Fpscr;
2947 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2948 fpscr.qc = 1;
2949 destElem = ~srcElem1;
2950 } else if (srcElem1 < 0) {
2951 destElem = -srcElem1;
2952 } else {
2953 destElem = srcElem1;
2954 }
2955 Fpscr = fpscr;
2956 '''
2957 twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2958 twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2959
2960 vqnegCode = '''
2961 FPSCR fpscr = (FPSCR)Fpscr;
2962 if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2963 fpscr.qc = 1;
2964 destElem = ~srcElem1;
2965 } else {
2966 destElem = -srcElem1;
2967 }
2968 Fpscr = fpscr;
2969 '''
2970 twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2971 twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2972
2973 vabsCode = '''
2974 if (srcElem1 < 0) {
2975 destElem = -srcElem1;
2976 } else {
2977 destElem = srcElem1;
2978 }
2979 '''
2980 twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2981 twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2982 vabsfpCode = '''
2983 union
2984 {
2985 uint32_t i;
2986 float f;
2987 } cStruct;
2988 cStruct.f = srcReg1;
2989 cStruct.i &= mask(sizeof(Element) * 8 - 1);
2990 destReg = cStruct.f;
2991 '''
2992 twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
2993 twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
2994
2995 vnegCode = '''
2996 destElem = -srcElem1;
2997 '''
2998 twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
2999 twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
3000 vnegfpCode = '''
3001 destReg = -srcReg1;
3002 '''
3003 twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
3004 twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
3005
3006 vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3007 twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3008 twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3009 vcgtfpCode = '''
3010 FPSCR fpscr = (FPSCR)Fpscr;
3011 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3012 true, true, VfpRoundNearest);
3013 destReg = (res == 0) ? -1 : 0;
3014 if (res == 2.0)
3015 fpscr.ioc = 1;
3016 Fpscr = fpscr;
3017 '''
3018 twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3019 2, vcgtfpCode, toInt = True)
3020 twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3021 4, vcgtfpCode, toInt = True)
3022
3023 vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3024 twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3025 twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3026 vcgefpCode = '''
3027 FPSCR fpscr = (FPSCR)Fpscr;
3028 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3029 true, true, VfpRoundNearest);
3030 destReg = (res == 0) ? -1 : 0;
3031 if (res == 2.0)
3032 fpscr.ioc = 1;
3033 Fpscr = fpscr;
3034 '''
3035 twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3036 2, vcgefpCode, toInt = True)
3037 twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3038 4, vcgefpCode, toInt = True)
3039
3040 vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3041 twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3042 twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3043 vceqfpCode = '''
3044 FPSCR fpscr = (FPSCR)Fpscr;
3045 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3046 true, true, VfpRoundNearest);
3047 destReg = (res == 0) ? -1 : 0;
3048 if (res == 2.0)
3049 fpscr.ioc = 1;
3050 Fpscr = fpscr;
3051 '''
3052 twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3053 2, vceqfpCode, toInt = True)
3054 twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3055 4, vceqfpCode, toInt = True)
3056
3057 vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3058 twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3059 twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3060 vclefpCode = '''
3061 FPSCR fpscr = (FPSCR)Fpscr;
3062 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3063 true, true, VfpRoundNearest);
3064 destReg = (res == 0) ? -1 : 0;
3065 if (res == 2.0)
3066 fpscr.ioc = 1;
3067 Fpscr = fpscr;
3068 '''
3069 twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3070 2, vclefpCode, toInt = True)
3071 twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3072 4, vclefpCode, toInt = True)
3073
3074 vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3075 twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3076 twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3077 vcltfpCode = '''
3078 FPSCR fpscr = (FPSCR)Fpscr;
3079 float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3080 true, true, VfpRoundNearest);
3081 destReg = (res == 0) ? -1 : 0;
3082 if (res == 2.0)
3083 fpscr.ioc = 1;
3084 Fpscr = fpscr;
3085 '''
3086 twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3087 2, vcltfpCode, toInt = True)
3088 twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3089 4, vcltfpCode, toInt = True)
3090
3091 vswpCode = '''
3092 FloatRegBits mid;
3093 for (unsigned r = 0; r < rCount; r++) {
3094 mid = srcReg1.regs[r];
3095 srcReg1.regs[r] = destReg.regs[r];
3096 destReg.regs[r] = mid;
3097 }
3098 '''
3099 twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3100 twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3101
3102 vtrnCode = '''
3103 Element mid;
3104 for (unsigned i = 0; i < eCount; i += 2) {
3105 mid = srcReg1.elements[i];
3106 srcReg1.elements[i] = destReg.elements[i + 1];
3107 destReg.elements[i + 1] = mid;
3108 }
3109 '''
3110 twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3111 twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3112
3113 vuzpCode = '''
3114 Element mid[eCount];
3115 memcpy(&mid, &srcReg1, sizeof(srcReg1));
3116 for (unsigned i = 0; i < eCount / 2; i++) {
3117 srcReg1.elements[i] = destReg.elements[2 * i + 1];
3118 srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3119 destReg.elements[i] = destReg.elements[2 * i];
3120 }
3121 for (unsigned i = 0; i < eCount / 2; i++) {
3122 destReg.elements[eCount / 2 + i] = mid[2 * i];
3123 }
3124 '''
3125 twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3126 twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3127
3128 vzipCode = '''
3129 Element mid[eCount];
3130 memcpy(&mid, &destReg, sizeof(destReg));
3131 for (unsigned i = 0; i < eCount / 2; i++) {
3132 destReg.elements[2 * i] = mid[i];
3133 destReg.elements[2 * i + 1] = srcReg1.elements[i];
3134 }
3135 for (int i = 0; i < eCount / 2; i++) {
3136 srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3137 srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3138 }
3139 '''
3140 twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3141 twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3142
3143 vmovnCode = 'destElem = srcElem1;'
3144 twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3145
3146 vdupCode = 'destElem = srcElem1;'
3147 twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3148 twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3149
3150 def vdupGprInst(name, Name, types, rCount):
3151 global header_output, exec_output
3152 eWalkCode = '''
3153 RegVect destReg;
3154 for (unsigned i = 0; i < eCount; i++) {
3155 destReg.elements[i] = htog((Element)Op1);
3156 }
3157 '''
3158 for reg in range(rCount):
3159 eWalkCode += '''
3160 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3161 ''' % { "reg" : reg }
3162 iop = InstObjParams(name, Name,
3163 "RegRegOp",
3164 { "code": eWalkCode,
3165 "r_count": rCount,
3166 "predicate_test": predicateTest }, [])
3167 header_output += NeonRegRegOpDeclare.subst(iop)
3168 exec_output += NeonEqualRegExecute.subst(iop)
3169 for type in types:
3170 substDict = { "targs" : type,
3171 "class_name" : Name }
3172 exec_output += NeonExecDeclare.subst(substDict)
3173 vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3174 vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3175
3176 vmovCode = 'destElem = imm;'
3177 oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3178 oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3179
3180 vorrCode = 'destElem |= imm;'
3181 oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3182 oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3183
3184 vmvnCode = 'destElem = ~imm;'
3185 oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3186 oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3187
3188 vbicCode = 'destElem &= ~imm;'
3189 oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3190 oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3191
3192 vqmovnCode = '''
3193 FPSCR fpscr = (FPSCR)Fpscr;
3194 destElem = srcElem1;
3195 if ((BigElement)destElem != srcElem1) {
3196 fpscr.qc = 1;
3197 destElem = mask(sizeof(Element) * 8 - 1);
3198 if (srcElem1 < 0)
3199 destElem = ~destElem;
3200 }
3201 Fpscr = fpscr;
3202 '''
3203 twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3204
3205 vqmovunCode = '''
3206 FPSCR fpscr = (FPSCR)Fpscr;
3207 destElem = srcElem1;
3208 if ((BigElement)destElem != srcElem1) {
3209 fpscr.qc = 1;
3210 destElem = mask(sizeof(Element) * 8);
3211 }
3212 Fpscr = fpscr;
3213 '''
3214 twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3215 smallUnsignedTypes, vqmovunCode)
3216
3217 vqmovunsCode = '''
3218 FPSCR fpscr = (FPSCR)Fpscr;
3219 destElem = srcElem1;
3220 if (srcElem1 < 0 ||
3221 ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3222 fpscr.qc = 1;
3223 destElem = mask(sizeof(Element) * 8);
3224 if (srcElem1 < 0)
3225 destElem = ~destElem;
3226 }
3227 Fpscr = fpscr;
3228 '''
3229 twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3230 smallSignedTypes, vqmovunsCode)
3231
3232 def buildVext(name, Name, types, rCount, op):
3233 global header_output, exec_output
3234 eWalkCode = '''
3235 RegVect srcReg1, srcReg2, destReg;
3236 '''
3237 for reg in range(rCount):
3238 eWalkCode += '''
3239 srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3240 srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3241 ''' % { "reg" : reg }
3242 eWalkCode += op
3243 for reg in range(rCount):
3244 eWalkCode += '''
3245 FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3246 ''' % { "reg" : reg }
3247 iop = InstObjParams(name, Name,
3248 "RegRegRegImmOp",
3249 { "code": eWalkCode,
3250 "r_count": rCount,
3251 "predicate_test": predicateTest }, [])
3252 header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3253 exec_output += NeonEqualRegExecute.subst(iop)
3254 for type in types:
3255 substDict = { "targs" : type,
3256 "class_name" : Name }
3257 exec_output += NeonExecDeclare.subst(substDict)
3258
3259 vextCode = '''
3260 for (unsigned i = 0; i < eCount; i++) {
3261 unsigned index = i + imm;
3262 if (index < eCount) {
3263 destReg.elements[i] = srcReg1.elements[index];
3264 } else {
3265 index -= eCount;
3266 assert(index < eCount);
3267 destReg.elements[i] = srcReg2.elements[index];
3268 }
3269 }
3270 '''
3271 buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3272 buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3273
3274 def buildVtbxl(name, Name, length, isVtbl):
3275 global header_output, decoder_output, exec_output
3276 code = '''
3277 union
3278 {
3279 uint8_t bytes[32];
3280 FloatRegBits regs[8];
3281 } table;
3282
3283 union
3284 {
3285 uint8_t bytes[8];
3286 FloatRegBits regs[2];
3287 } destReg, srcReg2;
3288
3289 const unsigned length = %(length)d;
3290 const bool isVtbl = %(isVtbl)s;
3291
3292 srcReg2.regs[0] = htog(FpOp2P0.uw);
3293 srcReg2.regs[1] = htog(FpOp2P1.uw);
3294
3295 destReg.regs[0] = htog(FpDestP0.uw);
3296 destReg.regs[1] = htog(FpDestP1.uw);
3297 ''' % { "length" : length, "isVtbl" : isVtbl }
3298 for reg in range(8):
3299 if reg < length * 2:
3300 code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3301 { "reg" : reg }
3302 else:
3303 code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3304 code += '''
3305 for (unsigned i = 0; i < sizeof(destReg); i++) {
3306 uint8_t index = srcReg2.bytes[i];
3307 if (index < 8 * length) {
3308 destReg.bytes[i] = table.bytes[index];
3309 } else {
3310 if (isVtbl)
3311 destReg.bytes[i] = 0;
3312 // else destReg.bytes[i] unchanged
3313 }
3314 }
3315
3316 FpDestP0.uw = gtoh(destReg.regs[0]);
3317 FpDestP1.uw = gtoh(destReg.regs[1]);
3318 '''
3319 iop = InstObjParams(name, Name,
3320 "RegRegRegOp",
3321 { "code": code,
3322 "predicate_test": predicateTest }, [])
3323 header_output += RegRegRegOpDeclare.subst(iop)
3324 decoder_output += RegRegRegOpConstructor.subst(iop)
3325 exec_output += PredOpExecute.subst(iop)
3326
3327 buildVtbxl("vtbl", "NVtbl1", 1, "true")
3328 buildVtbxl("vtbl", "NVtbl2", 2, "true")
3329 buildVtbxl("vtbl", "NVtbl3", 3, "true")
3330 buildVtbxl("vtbl", "NVtbl4", 4, "true")
3331
3332 buildVtbxl("vtbx", "NVtbx1", 1, "false")
3333 buildVtbxl("vtbx", "NVtbx2", 2, "false")
3334 buildVtbxl("vtbx", "NVtbx3", 3, "false")
3335 buildVtbxl("vtbx", "NVtbx4", 4, "false")
3336}};