neon.isa revision 7640
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (isSnan(op1) || isSnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (isSnan(op1) || isSnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (isSnan(op1) || isSnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (isSnan(op1) || isSnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (isSnan(op1) || isSnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (isSnan(op1) || isSnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623    simdEnabledCheckCode = '''
624        if (!neonEnabled(Cpacr, Cpsr, Fpexc))
625            return disabledFault();
626    '''
627}};
628
629let {{
630
631    header_output = ""
632    exec_output = ""
633
634    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
635    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
636    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
637    signedTypes = smallSignedTypes + ("int64_t",)
638    smallTypes = smallUnsignedTypes + smallSignedTypes
639    allTypes = unsignedTypes + signedTypes
640
641    def threeEqualRegInst(name, Name, types, rCount, op,
642                          readDest=False, pairwise=False):
643        global header_output, exec_output
644        eWalkCode = simdEnabledCheckCode + '''
645        RegVect srcReg1, srcReg2, destReg;
646        '''
647        for reg in range(rCount):
648            eWalkCode += '''
649                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
650                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
651            ''' % { "reg" : reg }
652            if readDest:
653                eWalkCode += '''
654                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
655                ''' % { "reg" : reg }
656        readDestCode = ''
657        if readDest:
658            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
659        if pairwise:
660            eWalkCode += '''
661            for (unsigned i = 0; i < eCount; i++) {
662                Element srcElem1 = gtoh(2 * i < eCount ?
663                                        srcReg1.elements[2 * i] :
664                                        srcReg2.elements[2 * i - eCount]);
665                Element srcElem2 = gtoh(2 * i < eCount ?
666                                        srcReg1.elements[2 * i + 1] :
667                                        srcReg2.elements[2 * i + 1 - eCount]);
668                Element destElem;
669                %(readDest)s
670                %(op)s
671                destReg.elements[i] = htog(destElem);
672            }
673            ''' % { "op" : op, "readDest" : readDestCode }
674        else:
675            eWalkCode += '''
676            for (unsigned i = 0; i < eCount; i++) {
677                Element srcElem1 = gtoh(srcReg1.elements[i]);
678                Element srcElem2 = gtoh(srcReg2.elements[i]);
679                Element destElem;
680                %(readDest)s
681                %(op)s
682                destReg.elements[i] = htog(destElem);
683            }
684            ''' % { "op" : op, "readDest" : readDestCode }
685        for reg in range(rCount):
686            eWalkCode += '''
687            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
688            ''' % { "reg" : reg }
689        iop = InstObjParams(name, Name,
690                            "RegRegRegOp",
691                            { "code": eWalkCode,
692                              "r_count": rCount,
693                              "predicate_test": predicateTest }, [])
694        header_output += NeonRegRegRegOpDeclare.subst(iop)
695        exec_output += NeonEqualRegExecute.subst(iop)
696        for type in types:
697            substDict = { "targs" : type,
698                          "class_name" : Name }
699            exec_output += NeonExecDeclare.subst(substDict)
700
701    def threeEqualRegInstFp(name, Name, types, rCount, op,
702                            readDest=False, pairwise=False, toInt=False):
703        global header_output, exec_output
704        eWalkCode = simdEnabledCheckCode + '''
705        typedef FloatReg FloatVect[rCount];
706        FloatVect srcRegs1, srcRegs2;
707        '''
708        if toInt:
709            eWalkCode += 'RegVect destRegs;\n'
710        else:
711            eWalkCode += 'FloatVect destRegs;\n'
712        for reg in range(rCount):
713            eWalkCode += '''
714                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
715                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
716            ''' % { "reg" : reg }
717            if readDest:
718                if toInt:
719                    eWalkCode += '''
720                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
721                    ''' % { "reg" : reg }
722                else:
723                    eWalkCode += '''
724                        destRegs[%(reg)d] = FpDestP%(reg)d;
725                    ''' % { "reg" : reg }
726        readDestCode = ''
727        if readDest:
728            readDestCode = 'destReg = destRegs[r];'
729        destType = 'FloatReg'
730        writeDest = 'destRegs[r] = destReg;'
731        if toInt:
732            destType = 'FloatRegBits'
733            writeDest = 'destRegs.regs[r] = destReg;'
734        if pairwise:
735            eWalkCode += '''
736            for (unsigned r = 0; r < rCount; r++) {
737                FloatReg srcReg1 = (2 * r < rCount) ?
738                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
739                FloatReg srcReg2 = (2 * r < rCount) ?
740                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
741                %(destType)s destReg;
742                %(readDest)s
743                %(op)s
744                %(writeDest)s
745            }
746            ''' % { "op" : op,
747                    "readDest" : readDestCode,
748                    "destType" : destType,
749                    "writeDest" : writeDest }
750        else:
751            eWalkCode += '''
752            for (unsigned r = 0; r < rCount; r++) {
753                FloatReg srcReg1 = srcRegs1[r];
754                FloatReg srcReg2 = srcRegs2[r];
755                %(destType)s destReg;
756                %(readDest)s
757                %(op)s
758                %(writeDest)s
759            }
760            ''' % { "op" : op,
761                    "readDest" : readDestCode,
762                    "destType" : destType,
763                    "writeDest" : writeDest }
764        for reg in range(rCount):
765            if toInt:
766                eWalkCode += '''
767                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
768                ''' % { "reg" : reg }
769            else:
770                eWalkCode += '''
771                FpDestP%(reg)d = destRegs[%(reg)d];
772                ''' % { "reg" : reg }
773        iop = InstObjParams(name, Name,
774                            "FpRegRegRegOp",
775                            { "code": eWalkCode,
776                              "r_count": rCount,
777                              "predicate_test": predicateTest }, [])
778        header_output += NeonRegRegRegOpDeclare.subst(iop)
779        exec_output += NeonEqualRegExecute.subst(iop)
780        for type in types:
781            substDict = { "targs" : type,
782                          "class_name" : Name }
783            exec_output += NeonExecDeclare.subst(substDict)
784
785    def threeUnequalRegInst(name, Name, types, op,
786                            bigSrc1, bigSrc2, bigDest, readDest):
787        global header_output, exec_output
788        src1Cnt = src2Cnt = destCnt = 2
789        src1Prefix = src2Prefix = destPrefix = ''
790        if bigSrc1:
791            src1Cnt = 4
792            src1Prefix = 'Big'
793        if bigSrc2:
794            src2Cnt = 4
795            src2Prefix = 'Big'
796        if bigDest:
797            destCnt = 4
798            destPrefix = 'Big'
799        eWalkCode = simdEnabledCheckCode + '''
800            %sRegVect srcReg1;
801            %sRegVect srcReg2;
802            %sRegVect destReg;
803        ''' % (src1Prefix, src2Prefix, destPrefix)
804        for reg in range(src1Cnt):
805            eWalkCode += '''
806                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
807            ''' % { "reg" : reg }
808        for reg in range(src2Cnt):
809            eWalkCode += '''
810                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
811            ''' % { "reg" : reg }
812        if readDest:
813            for reg in range(destCnt):
814                eWalkCode += '''
815                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
816                ''' % { "reg" : reg }
817        readDestCode = ''
818        if readDest:
819            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
820        eWalkCode += '''
821        for (unsigned i = 0; i < eCount; i++) {
822            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
823            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
824            %(destPrefix)sElement destElem;
825            %(readDest)s
826            %(op)s
827            destReg.elements[i] = htog(destElem);
828        }
829        ''' % { "op" : op, "readDest" : readDestCode,
830                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
831                "destPrefix" : destPrefix }
832        for reg in range(destCnt):
833            eWalkCode += '''
834            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
835            ''' % { "reg" : reg }
836        iop = InstObjParams(name, Name,
837                            "RegRegRegOp",
838                            { "code": eWalkCode,
839                              "r_count": 2,
840                              "predicate_test": predicateTest }, [])
841        header_output += NeonRegRegRegOpDeclare.subst(iop)
842        exec_output += NeonUnequalRegExecute.subst(iop)
843        for type in types:
844            substDict = { "targs" : type,
845                          "class_name" : Name }
846            exec_output += NeonExecDeclare.subst(substDict)
847
848    def threeRegNarrowInst(name, Name, types, op, readDest=False):
849        threeUnequalRegInst(name, Name, types, op,
850                            True, True, False, readDest)
851
852    def threeRegLongInst(name, Name, types, op, readDest=False):
853        threeUnequalRegInst(name, Name, types, op,
854                            False, False, True, readDest)
855
856    def threeRegWideInst(name, Name, types, op, readDest=False):
857        threeUnequalRegInst(name, Name, types, op,
858                            True, False, True, readDest)
859
860    def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
861        global header_output, exec_output
862        eWalkCode = simdEnabledCheckCode + '''
863        RegVect srcReg1, srcReg2, destReg;
864        '''
865        for reg in range(rCount):
866            eWalkCode += '''
867                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
868                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
869            ''' % { "reg" : reg }
870            if readDest:
871                eWalkCode += '''
872                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
873                ''' % { "reg" : reg }
874        readDestCode = ''
875        if readDest:
876            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
877        eWalkCode += '''
878        assert(imm >= 0 && imm < eCount);
879        for (unsigned i = 0; i < eCount; i++) {
880            Element srcElem1 = gtoh(srcReg1.elements[i]);
881            Element srcElem2 = gtoh(srcReg2.elements[imm]);
882            Element destElem;
883            %(readDest)s
884            %(op)s
885            destReg.elements[i] = htog(destElem);
886        }
887        ''' % { "op" : op, "readDest" : readDestCode }
888        for reg in range(rCount):
889            eWalkCode += '''
890            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
891            ''' % { "reg" : reg }
892        iop = InstObjParams(name, Name,
893                            "RegRegRegImmOp",
894                            { "code": eWalkCode,
895                              "r_count": rCount,
896                              "predicate_test": predicateTest }, [])
897        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
898        exec_output += NeonEqualRegExecute.subst(iop)
899        for type in types:
900            substDict = { "targs" : type,
901                          "class_name" : Name }
902            exec_output += NeonExecDeclare.subst(substDict)
903
904    def twoRegLongInst(name, Name, types, op, readDest=False):
905        global header_output, exec_output
906        rCount = 2
907        eWalkCode = simdEnabledCheckCode + '''
908        RegVect srcReg1, srcReg2;
909        BigRegVect destReg;
910        '''
911        for reg in range(rCount):
912            eWalkCode += '''
913                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
914                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
915            ''' % { "reg" : reg }
916        if readDest:
917            for reg in range(2 * rCount):
918                eWalkCode += '''
919                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
920                ''' % { "reg" : reg }
921        readDestCode = ''
922        if readDest:
923            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
924        eWalkCode += '''
925        assert(imm >= 0 && imm < eCount);
926        for (unsigned i = 0; i < eCount; i++) {
927            Element srcElem1 = gtoh(srcReg1.elements[i]);
928            Element srcElem2 = gtoh(srcReg2.elements[imm]);
929            BigElement destElem;
930            %(readDest)s
931            %(op)s
932            destReg.elements[i] = htog(destElem);
933        }
934        ''' % { "op" : op, "readDest" : readDestCode }
935        for reg in range(2 * rCount):
936            eWalkCode += '''
937            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
938            ''' % { "reg" : reg }
939        iop = InstObjParams(name, Name,
940                            "RegRegRegImmOp",
941                            { "code": eWalkCode,
942                              "r_count": rCount,
943                              "predicate_test": predicateTest }, [])
944        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
945        exec_output += NeonUnequalRegExecute.subst(iop)
946        for type in types:
947            substDict = { "targs" : type,
948                          "class_name" : Name }
949            exec_output += NeonExecDeclare.subst(substDict)
950
951    def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
952        global header_output, exec_output
953        eWalkCode = simdEnabledCheckCode + '''
954        typedef FloatReg FloatVect[rCount];
955        FloatVect srcRegs1, srcRegs2, destRegs;
956        '''
957        for reg in range(rCount):
958            eWalkCode += '''
959                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
960                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
961            ''' % { "reg" : reg }
962            if readDest:
963                eWalkCode += '''
964                    destRegs[%(reg)d] = FpDestP%(reg)d;
965                ''' % { "reg" : reg }
966        readDestCode = ''
967        if readDest:
968            readDestCode = 'destReg = destRegs[i];'
969        eWalkCode += '''
970        assert(imm >= 0 && imm < rCount);
971        for (unsigned i = 0; i < rCount; i++) {
972            FloatReg srcReg1 = srcRegs1[i];
973            FloatReg srcReg2 = srcRegs2[imm];
974            FloatReg destReg;
975            %(readDest)s
976            %(op)s
977            destRegs[i] = destReg;
978        }
979        ''' % { "op" : op, "readDest" : readDestCode }
980        for reg in range(rCount):
981            eWalkCode += '''
982            FpDestP%(reg)d = destRegs[%(reg)d];
983            ''' % { "reg" : reg }
984        iop = InstObjParams(name, Name,
985                            "FpRegRegRegImmOp",
986                            { "code": eWalkCode,
987                              "r_count": rCount,
988                              "predicate_test": predicateTest }, [])
989        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
990        exec_output += NeonEqualRegExecute.subst(iop)
991        for type in types:
992            substDict = { "targs" : type,
993                          "class_name" : Name }
994            exec_output += NeonExecDeclare.subst(substDict)
995
996    def twoRegShiftInst(name, Name, types, rCount, op,
997            readDest=False, toInt=False, fromInt=False):
998        global header_output, exec_output
999        eWalkCode = simdEnabledCheckCode + '''
1000        RegVect srcRegs1, destRegs;
1001        '''
1002        for reg in range(rCount):
1003            eWalkCode += '''
1004                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1005            ''' % { "reg" : reg }
1006            if readDest:
1007                eWalkCode += '''
1008                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1009                ''' % { "reg" : reg }
1010        readDestCode = ''
1011        if readDest:
1012            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1013            if toInt:
1014                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1015        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1016        if fromInt:
1017            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1018        declDest = 'Element destElem;'
1019        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1020        if toInt:
1021            declDest = 'FloatRegBits destReg;'
1022            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1023        eWalkCode += '''
1024        for (unsigned i = 0; i < eCount; i++) {
1025            %(readOp)s
1026            %(declDest)s
1027            %(readDest)s
1028            %(op)s
1029            %(writeDest)s
1030        }
1031        ''' % { "readOp" : readOpCode,
1032                "declDest" : declDest,
1033                "readDest" : readDestCode,
1034                "op" : op,
1035                "writeDest" : writeDestCode }
1036        for reg in range(rCount):
1037            eWalkCode += '''
1038            FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1039            ''' % { "reg" : reg }
1040        iop = InstObjParams(name, Name,
1041                            "RegRegImmOp",
1042                            { "code": eWalkCode,
1043                              "r_count": rCount,
1044                              "predicate_test": predicateTest }, [])
1045        header_output += NeonRegRegImmOpDeclare.subst(iop)
1046        exec_output += NeonEqualRegExecute.subst(iop)
1047        for type in types:
1048            substDict = { "targs" : type,
1049                          "class_name" : Name }
1050            exec_output += NeonExecDeclare.subst(substDict)
1051
1052    def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1053        global header_output, exec_output
1054        eWalkCode = simdEnabledCheckCode + '''
1055        BigRegVect srcReg1;
1056        RegVect destReg;
1057        '''
1058        for reg in range(4):
1059            eWalkCode += '''
1060                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1061            ''' % { "reg" : reg }
1062        if readDest:
1063            for reg in range(2):
1064                eWalkCode += '''
1065                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1066                ''' % { "reg" : reg }
1067        readDestCode = ''
1068        if readDest:
1069            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1070        eWalkCode += '''
1071        for (unsigned i = 0; i < eCount; i++) {
1072            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1073            Element destElem;
1074            %(readDest)s
1075            %(op)s
1076            destReg.elements[i] = htog(destElem);
1077        }
1078        ''' % { "op" : op, "readDest" : readDestCode }
1079        for reg in range(2):
1080            eWalkCode += '''
1081            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1082            ''' % { "reg" : reg }
1083        iop = InstObjParams(name, Name,
1084                            "RegRegImmOp",
1085                            { "code": eWalkCode,
1086                              "r_count": 2,
1087                              "predicate_test": predicateTest }, [])
1088        header_output += NeonRegRegImmOpDeclare.subst(iop)
1089        exec_output += NeonUnequalRegExecute.subst(iop)
1090        for type in types:
1091            substDict = { "targs" : type,
1092                          "class_name" : Name }
1093            exec_output += NeonExecDeclare.subst(substDict)
1094
1095    def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1096        global header_output, exec_output
1097        eWalkCode = simdEnabledCheckCode + '''
1098        RegVect srcReg1;
1099        BigRegVect destReg;
1100        '''
1101        for reg in range(2):
1102            eWalkCode += '''
1103                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1104            ''' % { "reg" : reg }
1105        if readDest:
1106            for reg in range(4):
1107                eWalkCode += '''
1108                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1109                ''' % { "reg" : reg }
1110        readDestCode = ''
1111        if readDest:
1112            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1113        eWalkCode += '''
1114        for (unsigned i = 0; i < eCount; i++) {
1115            Element srcElem1 = gtoh(srcReg1.elements[i]);
1116            BigElement destElem;
1117            %(readDest)s
1118            %(op)s
1119            destReg.elements[i] = htog(destElem);
1120        }
1121        ''' % { "op" : op, "readDest" : readDestCode }
1122        for reg in range(4):
1123            eWalkCode += '''
1124            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1125            ''' % { "reg" : reg }
1126        iop = InstObjParams(name, Name,
1127                            "RegRegImmOp",
1128                            { "code": eWalkCode,
1129                              "r_count": 2,
1130                              "predicate_test": predicateTest }, [])
1131        header_output += NeonRegRegImmOpDeclare.subst(iop)
1132        exec_output += NeonUnequalRegExecute.subst(iop)
1133        for type in types:
1134            substDict = { "targs" : type,
1135                          "class_name" : Name }
1136            exec_output += NeonExecDeclare.subst(substDict)
1137
1138    def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1139        global header_output, exec_output
1140        eWalkCode = simdEnabledCheckCode + '''
1141        RegVect srcReg1, destReg;
1142        '''
1143        for reg in range(rCount):
1144            eWalkCode += '''
1145                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1146            ''' % { "reg" : reg }
1147            if readDest:
1148                eWalkCode += '''
1149                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1150                ''' % { "reg" : reg }
1151        readDestCode = ''
1152        if readDest:
1153            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1154        eWalkCode += '''
1155        for (unsigned i = 0; i < eCount; i++) {
1156            unsigned j = i;
1157            Element srcElem1 = gtoh(srcReg1.elements[i]);
1158            Element destElem;
1159            %(readDest)s
1160            %(op)s
1161            destReg.elements[j] = htog(destElem);
1162        }
1163        ''' % { "op" : op, "readDest" : readDestCode }
1164        for reg in range(rCount):
1165            eWalkCode += '''
1166            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1167            ''' % { "reg" : reg }
1168        iop = InstObjParams(name, Name,
1169                            "RegRegOp",
1170                            { "code": eWalkCode,
1171                              "r_count": rCount,
1172                              "predicate_test": predicateTest }, [])
1173        header_output += NeonRegRegOpDeclare.subst(iop)
1174        exec_output += NeonEqualRegExecute.subst(iop)
1175        for type in types:
1176            substDict = { "targs" : type,
1177                          "class_name" : Name }
1178            exec_output += NeonExecDeclare.subst(substDict)
1179
1180    def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1181        global header_output, exec_output
1182        eWalkCode = simdEnabledCheckCode + '''
1183        RegVect srcReg1, destReg;
1184        '''
1185        for reg in range(rCount):
1186            eWalkCode += '''
1187                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1188            ''' % { "reg" : reg }
1189            if readDest:
1190                eWalkCode += '''
1191                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1192                ''' % { "reg" : reg }
1193        readDestCode = ''
1194        if readDest:
1195            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1196        eWalkCode += '''
1197        for (unsigned i = 0; i < eCount; i++) {
1198            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1199            Element destElem;
1200            %(readDest)s
1201            %(op)s
1202            destReg.elements[i] = htog(destElem);
1203        }
1204        ''' % { "op" : op, "readDest" : readDestCode }
1205        for reg in range(rCount):
1206            eWalkCode += '''
1207            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1208            ''' % { "reg" : reg }
1209        iop = InstObjParams(name, Name,
1210                            "RegRegImmOp",
1211                            { "code": eWalkCode,
1212                              "r_count": rCount,
1213                              "predicate_test": predicateTest }, [])
1214        header_output += NeonRegRegImmOpDeclare.subst(iop)
1215        exec_output += NeonEqualRegExecute.subst(iop)
1216        for type in types:
1217            substDict = { "targs" : type,
1218                          "class_name" : Name }
1219            exec_output += NeonExecDeclare.subst(substDict)
1220
1221    def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1222        global header_output, exec_output
1223        eWalkCode = simdEnabledCheckCode + '''
1224        RegVect srcReg1, destReg;
1225        '''
1226        for reg in range(rCount):
1227            eWalkCode += '''
1228                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1229                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1230            ''' % { "reg" : reg }
1231            if readDest:
1232                eWalkCode += '''
1233                ''' % { "reg" : reg }
1234        readDestCode = ''
1235        if readDest:
1236            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1237        eWalkCode += op
1238        for reg in range(rCount):
1239            eWalkCode += '''
1240            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1241            FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1242            ''' % { "reg" : reg }
1243        iop = InstObjParams(name, Name,
1244                            "RegRegOp",
1245                            { "code": eWalkCode,
1246                              "r_count": rCount,
1247                              "predicate_test": predicateTest }, [])
1248        header_output += NeonRegRegOpDeclare.subst(iop)
1249        exec_output += NeonEqualRegExecute.subst(iop)
1250        for type in types:
1251            substDict = { "targs" : type,
1252                          "class_name" : Name }
1253            exec_output += NeonExecDeclare.subst(substDict)
1254
1255    def twoRegMiscInstFp(name, Name, types, rCount, op,
1256            readDest=False, toInt=False):
1257        global header_output, exec_output
1258        eWalkCode = simdEnabledCheckCode + '''
1259        typedef FloatReg FloatVect[rCount];
1260        FloatVect srcRegs1;
1261        '''
1262        if toInt:
1263            eWalkCode += 'RegVect destRegs;\n'
1264        else:
1265            eWalkCode += 'FloatVect destRegs;\n'
1266        for reg in range(rCount):
1267            eWalkCode += '''
1268                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1269            ''' % { "reg" : reg }
1270            if readDest:
1271                if toInt:
1272                    eWalkCode += '''
1273                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1274                    ''' % { "reg" : reg }
1275                else:
1276                    eWalkCode += '''
1277                        destRegs[%(reg)d] = FpDestP%(reg)d;
1278                    ''' % { "reg" : reg }
1279        readDestCode = ''
1280        if readDest:
1281            readDestCode = 'destReg = destRegs[i];'
1282        destType = 'FloatReg'
1283        writeDest = 'destRegs[r] = destReg;'
1284        if toInt:
1285            destType = 'FloatRegBits'
1286            writeDest = 'destRegs.regs[r] = destReg;'
1287        eWalkCode += '''
1288        for (unsigned r = 0; r < rCount; r++) {
1289            FloatReg srcReg1 = srcRegs1[r];
1290            %(destType)s destReg;
1291            %(readDest)s
1292            %(op)s
1293            %(writeDest)s
1294        }
1295        ''' % { "op" : op,
1296                "readDest" : readDestCode,
1297                "destType" : destType,
1298                "writeDest" : writeDest }
1299        for reg in range(rCount):
1300            if toInt:
1301                eWalkCode += '''
1302                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1303                ''' % { "reg" : reg }
1304            else:
1305                eWalkCode += '''
1306                FpDestP%(reg)d = destRegs[%(reg)d];
1307                ''' % { "reg" : reg }
1308        iop = InstObjParams(name, Name,
1309                            "FpRegRegOp",
1310                            { "code": eWalkCode,
1311                              "r_count": rCount,
1312                              "predicate_test": predicateTest }, [])
1313        header_output += NeonRegRegOpDeclare.subst(iop)
1314        exec_output += NeonEqualRegExecute.subst(iop)
1315        for type in types:
1316            substDict = { "targs" : type,
1317                          "class_name" : Name }
1318            exec_output += NeonExecDeclare.subst(substDict)
1319
1320    def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1321        global header_output, exec_output
1322        eWalkCode = simdEnabledCheckCode + '''
1323        RegVect srcRegs;
1324        BigRegVect destReg;
1325        '''
1326        for reg in range(rCount):
1327            eWalkCode += '''
1328                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1329            ''' % { "reg" : reg }
1330            if readDest:
1331                eWalkCode += '''
1332                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1333                ''' % { "reg" : reg }
1334        readDestCode = ''
1335        if readDest:
1336            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1337        eWalkCode += '''
1338        for (unsigned i = 0; i < eCount / 2; i++) {
1339            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1340            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1341            BigElement destElem;
1342            %(readDest)s
1343            %(op)s
1344            destReg.elements[i] = htog(destElem);
1345        }
1346        ''' % { "op" : op, "readDest" : readDestCode }
1347        for reg in range(rCount):
1348            eWalkCode += '''
1349            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1350            ''' % { "reg" : reg }
1351        iop = InstObjParams(name, Name,
1352                            "RegRegOp",
1353                            { "code": eWalkCode,
1354                              "r_count": rCount,
1355                              "predicate_test": predicateTest }, [])
1356        header_output += NeonRegRegOpDeclare.subst(iop)
1357        exec_output += NeonUnequalRegExecute.subst(iop)
1358        for type in types:
1359            substDict = { "targs" : type,
1360                          "class_name" : Name }
1361            exec_output += NeonExecDeclare.subst(substDict)
1362
1363    def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1364        global header_output, exec_output
1365        eWalkCode = simdEnabledCheckCode + '''
1366        BigRegVect srcReg1;
1367        RegVect destReg;
1368        '''
1369        for reg in range(4):
1370            eWalkCode += '''
1371                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1372            ''' % { "reg" : reg }
1373        if readDest:
1374            for reg in range(2):
1375                eWalkCode += '''
1376                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1377                ''' % { "reg" : reg }
1378        readDestCode = ''
1379        if readDest:
1380            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1381        eWalkCode += '''
1382        for (unsigned i = 0; i < eCount; i++) {
1383            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1384            Element destElem;
1385            %(readDest)s
1386            %(op)s
1387            destReg.elements[i] = htog(destElem);
1388        }
1389        ''' % { "op" : op, "readDest" : readDestCode }
1390        for reg in range(2):
1391            eWalkCode += '''
1392            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1393            ''' % { "reg" : reg }
1394        iop = InstObjParams(name, Name,
1395                            "RegRegOp",
1396                            { "code": eWalkCode,
1397                              "r_count": 2,
1398                              "predicate_test": predicateTest }, [])
1399        header_output += NeonRegRegOpDeclare.subst(iop)
1400        exec_output += NeonUnequalRegExecute.subst(iop)
1401        for type in types:
1402            substDict = { "targs" : type,
1403                          "class_name" : Name }
1404            exec_output += NeonExecDeclare.subst(substDict)
1405
1406    def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1407        global header_output, exec_output
1408        eWalkCode = simdEnabledCheckCode + '''
1409        RegVect destReg;
1410        '''
1411        if readDest:
1412            for reg in range(rCount):
1413                eWalkCode += '''
1414                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1415                ''' % { "reg" : reg }
1416        readDestCode = ''
1417        if readDest:
1418            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1419        eWalkCode += '''
1420        for (unsigned i = 0; i < eCount; i++) {
1421            Element destElem;
1422            %(readDest)s
1423            %(op)s
1424            destReg.elements[i] = htog(destElem);
1425        }
1426        ''' % { "op" : op, "readDest" : readDestCode }
1427        for reg in range(rCount):
1428            eWalkCode += '''
1429            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1430            ''' % { "reg" : reg }
1431        iop = InstObjParams(name, Name,
1432                            "RegImmOp",
1433                            { "code": eWalkCode,
1434                              "r_count": rCount,
1435                              "predicate_test": predicateTest }, [])
1436        header_output += NeonRegImmOpDeclare.subst(iop)
1437        exec_output += NeonEqualRegExecute.subst(iop)
1438        for type in types:
1439            substDict = { "targs" : type,
1440                          "class_name" : Name }
1441            exec_output += NeonExecDeclare.subst(substDict)
1442
1443    def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1444        global header_output, exec_output
1445        eWalkCode = simdEnabledCheckCode + '''
1446        RegVect srcReg1;
1447        BigRegVect destReg;
1448        '''
1449        for reg in range(2):
1450            eWalkCode += '''
1451                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1452            ''' % { "reg" : reg }
1453        if readDest:
1454            for reg in range(4):
1455                eWalkCode += '''
1456                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1457                ''' % { "reg" : reg }
1458        readDestCode = ''
1459        if readDest:
1460            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1461        eWalkCode += '''
1462        for (unsigned i = 0; i < eCount; i++) {
1463            Element srcElem1 = gtoh(srcReg1.elements[i]);
1464            BigElement destElem;
1465            %(readDest)s
1466            %(op)s
1467            destReg.elements[i] = htog(destElem);
1468        }
1469        ''' % { "op" : op, "readDest" : readDestCode }
1470        for reg in range(4):
1471            eWalkCode += '''
1472            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1473            ''' % { "reg" : reg }
1474        iop = InstObjParams(name, Name,
1475                            "RegRegOp",
1476                            { "code": eWalkCode,
1477                              "r_count": 2,
1478                              "predicate_test": predicateTest }, [])
1479        header_output += NeonRegRegOpDeclare.subst(iop)
1480        exec_output += NeonUnequalRegExecute.subst(iop)
1481        for type in types:
1482            substDict = { "targs" : type,
1483                          "class_name" : Name }
1484            exec_output += NeonExecDeclare.subst(substDict)
1485
1486    vhaddCode = '''
1487        Element carryBit =
1488            (((unsigned)srcElem1 & 0x1) +
1489             ((unsigned)srcElem2 & 0x1)) >> 1;
1490        // Use division instead of a shift to ensure the sign extension works
1491        // right. The compiler will figure out if it can be a shift. Mask the
1492        // inputs so they get truncated correctly.
1493        destElem = (((srcElem1 & ~(Element)1) / 2) +
1494                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1495    '''
1496    threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1497    threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1498
1499    vrhaddCode = '''
1500        Element carryBit =
1501            (((unsigned)srcElem1 & 0x1) +
1502             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1503        // Use division instead of a shift to ensure the sign extension works
1504        // right. The compiler will figure out if it can be a shift. Mask the
1505        // inputs so they get truncated correctly.
1506        destElem = (((srcElem1 & ~(Element)1) / 2) +
1507                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1508    '''
1509    threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1510    threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1511
1512    vhsubCode = '''
1513        Element barrowBit =
1514            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1515        // Use division instead of a shift to ensure the sign extension works
1516        // right. The compiler will figure out if it can be a shift. Mask the
1517        // inputs so they get truncated correctly.
1518        destElem = (((srcElem1 & ~(Element)1) / 2) -
1519                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1520    '''
1521    threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1522    threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1523
1524    vandCode = '''
1525        destElem = srcElem1 & srcElem2;
1526    '''
1527    threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1528    threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1529
1530    vbicCode = '''
1531        destElem = srcElem1 & ~srcElem2;
1532    '''
1533    threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1534    threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1535
1536    vorrCode = '''
1537        destElem = srcElem1 | srcElem2;
1538    '''
1539    threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1540    threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1541
1542    threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1543    threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1544
1545    vornCode = '''
1546        destElem = srcElem1 | ~srcElem2;
1547    '''
1548    threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1549    threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1550
1551    veorCode = '''
1552        destElem = srcElem1 ^ srcElem2;
1553    '''
1554    threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1555    threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1556
1557    vbifCode = '''
1558        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1559    '''
1560    threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1561    threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1562    vbitCode = '''
1563        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1564    '''
1565    threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1566    threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1567    vbslCode = '''
1568        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1569    '''
1570    threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1571    threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1572
1573    vmaxCode = '''
1574        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1575    '''
1576    threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1577    threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1578
1579    vminCode = '''
1580        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1581    '''
1582    threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1583    threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1584
1585    vaddCode = '''
1586        destElem = srcElem1 + srcElem2;
1587    '''
1588    threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1589    threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1590
1591    threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1592                      2, vaddCode, pairwise=True)
1593    threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1594                      4, vaddCode, pairwise=True)
1595    vaddlwCode = '''
1596        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1597    '''
1598    threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1599    threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1600    vaddhnCode = '''
1601        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1602                   (sizeof(Element) * 8);
1603    '''
1604    threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1605    vraddhnCode = '''
1606        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1607                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1608                   (sizeof(Element) * 8);
1609    '''
1610    threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1611
1612    vsubCode = '''
1613        destElem = srcElem1 - srcElem2;
1614    '''
1615    threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1616    threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1617    vsublwCode = '''
1618        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1619    '''
1620    threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1621    threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1622
1623    vqaddUCode = '''
1624        destElem = srcElem1 + srcElem2;
1625        FPSCR fpscr = (FPSCR)Fpscr;
1626        if (destElem < srcElem1 || destElem < srcElem2) {
1627            destElem = (Element)(-1);
1628            fpscr.qc = 1;
1629        }
1630        Fpscr = fpscr;
1631    '''
1632    threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1633    threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1634    vsubhnCode = '''
1635        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1636                   (sizeof(Element) * 8);
1637    '''
1638    threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1639    vrsubhnCode = '''
1640        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1641                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1642                   (sizeof(Element) * 8);
1643    '''
1644    threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1645
1646    vqaddSCode = '''
1647        destElem = srcElem1 + srcElem2;
1648        FPSCR fpscr = (FPSCR)Fpscr;
1649        bool negDest = (destElem < 0);
1650        bool negSrc1 = (srcElem1 < 0);
1651        bool negSrc2 = (srcElem2 < 0);
1652        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1653            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1654            if (negDest)
1655                destElem -= 1;
1656            fpscr.qc = 1;
1657        }
1658        Fpscr = fpscr;
1659    '''
1660    threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1661    threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1662
1663    vqsubUCode = '''
1664        destElem = srcElem1 - srcElem2;
1665        FPSCR fpscr = (FPSCR)Fpscr;
1666        if (destElem > srcElem1) {
1667            destElem = 0;
1668            fpscr.qc = 1;
1669        }
1670        Fpscr = fpscr;
1671    '''
1672    threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1673    threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1674
1675    vqsubSCode = '''
1676        destElem = srcElem1 - srcElem2;
1677        FPSCR fpscr = (FPSCR)Fpscr;
1678        bool negDest = (destElem < 0);
1679        bool negSrc1 = (srcElem1 < 0);
1680        bool posSrc2 = (srcElem2 >= 0);
1681        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1682            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1683            if (negDest)
1684                destElem -= 1;
1685            fpscr.qc = 1;
1686        }
1687        Fpscr = fpscr;
1688    '''
1689    threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1690    threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1691
1692    vcgtCode = '''
1693        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1694    '''
1695    threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1696    threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1697
1698    vcgeCode = '''
1699        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1700    '''
1701    threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1702    threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1703
1704    vceqCode = '''
1705        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1706    '''
1707    threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1708    threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1709
1710    vshlCode = '''
1711        int16_t shiftAmt = (int8_t)srcElem2;
1712        if (shiftAmt < 0) {
1713            shiftAmt = -shiftAmt;
1714            if (shiftAmt >= sizeof(Element) * 8) {
1715                shiftAmt = sizeof(Element) * 8 - 1;
1716                destElem = 0;
1717            } else {
1718                destElem = (srcElem1 >> shiftAmt);
1719            }
1720            // Make sure the right shift sign extended when it should.
1721            if (srcElem1 < 0 && destElem >= 0) {
1722                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1723                                             1 - shiftAmt));
1724            }
1725        } else {
1726            if (shiftAmt >= sizeof(Element) * 8) {
1727                destElem = 0;
1728            } else {
1729                destElem = srcElem1 << shiftAmt;
1730            }
1731        }
1732    '''
1733    threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1734    threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1735
1736    vrshlCode = '''
1737        int16_t shiftAmt = (int8_t)srcElem2;
1738        if (shiftAmt < 0) {
1739            shiftAmt = -shiftAmt;
1740            Element rBit = 0;
1741            if (shiftAmt <= sizeof(Element) * 8)
1742                rBit = bits(srcElem1, shiftAmt - 1);
1743            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1744                rBit = 1;
1745            if (shiftAmt >= sizeof(Element) * 8) {
1746                shiftAmt = sizeof(Element) * 8 - 1;
1747                destElem = 0;
1748            } else {
1749                destElem = (srcElem1 >> shiftAmt);
1750            }
1751            // Make sure the right shift sign extended when it should.
1752            if (srcElem1 < 0 && destElem >= 0) {
1753                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1754                                             1 - shiftAmt));
1755            }
1756            destElem += rBit;
1757        } else if (shiftAmt > 0) {
1758            if (shiftAmt >= sizeof(Element) * 8) {
1759                destElem = 0;
1760            } else {
1761                destElem = srcElem1 << shiftAmt;
1762            }
1763        } else {
1764            destElem = srcElem1;
1765        }
1766    '''
1767    threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1768    threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1769
1770    vqshlUCode = '''
1771        int16_t shiftAmt = (int8_t)srcElem2;
1772        FPSCR fpscr = (FPSCR)Fpscr;
1773        if (shiftAmt < 0) {
1774            shiftAmt = -shiftAmt;
1775            if (shiftAmt >= sizeof(Element) * 8) {
1776                shiftAmt = sizeof(Element) * 8 - 1;
1777                destElem = 0;
1778            } else {
1779                destElem = (srcElem1 >> shiftAmt);
1780            }
1781            // Make sure the right shift sign extended when it should.
1782            if (srcElem1 < 0 && destElem >= 0) {
1783                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1784                                             1 - shiftAmt));
1785            }
1786        } else if (shiftAmt > 0) {
1787            if (shiftAmt >= sizeof(Element) * 8) {
1788                if (srcElem1 != 0) {
1789                    destElem = mask(sizeof(Element) * 8);
1790                    fpscr.qc = 1;
1791                } else {
1792                    destElem = 0;
1793                }
1794            } else {
1795                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1796                            sizeof(Element) * 8 - shiftAmt)) {
1797                    destElem = mask(sizeof(Element) * 8);
1798                    fpscr.qc = 1;
1799                } else {
1800                    destElem = srcElem1 << shiftAmt;
1801                }
1802            }
1803        } else {
1804            destElem = srcElem1;
1805        }
1806        Fpscr = fpscr;
1807    '''
1808    threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1809    threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1810
1811    vqshlSCode = '''
1812        int16_t shiftAmt = (int8_t)srcElem2;
1813        FPSCR fpscr = (FPSCR)Fpscr;
1814        if (shiftAmt < 0) {
1815            shiftAmt = -shiftAmt;
1816            if (shiftAmt >= sizeof(Element) * 8) {
1817                shiftAmt = sizeof(Element) * 8 - 1;
1818                destElem = 0;
1819            } else {
1820                destElem = (srcElem1 >> shiftAmt);
1821            }
1822            // Make sure the right shift sign extended when it should.
1823            if (srcElem1 < 0 && destElem >= 0) {
1824                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1825                                             1 - shiftAmt));
1826            }
1827        } else if (shiftAmt > 0) {
1828            bool sat = false;
1829            if (shiftAmt >= sizeof(Element) * 8) {
1830                if (srcElem1 != 0)
1831                    sat = true;
1832                else
1833                    destElem = 0;
1834            } else {
1835                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1836                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1837                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1838                    sat = true;
1839                } else {
1840                    destElem = srcElem1 << shiftAmt;
1841                }
1842            }
1843            if (sat) {
1844                fpscr.qc = 1;
1845                destElem = mask(sizeof(Element) * 8 - 1);
1846                if (srcElem1 < 0)
1847                    destElem = ~destElem;
1848            }
1849        } else {
1850            destElem = srcElem1;
1851        }
1852        Fpscr = fpscr;
1853    '''
1854    threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1855    threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1856
1857    vqrshlUCode = '''
1858        int16_t shiftAmt = (int8_t)srcElem2;
1859        FPSCR fpscr = (FPSCR)Fpscr;
1860        if (shiftAmt < 0) {
1861            shiftAmt = -shiftAmt;
1862            Element rBit = 0;
1863            if (shiftAmt <= sizeof(Element) * 8)
1864                rBit = bits(srcElem1, shiftAmt - 1);
1865            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1866                rBit = 1;
1867            if (shiftAmt >= sizeof(Element) * 8) {
1868                shiftAmt = sizeof(Element) * 8 - 1;
1869                destElem = 0;
1870            } else {
1871                destElem = (srcElem1 >> shiftAmt);
1872            }
1873            // Make sure the right shift sign extended when it should.
1874            if (srcElem1 < 0 && destElem >= 0) {
1875                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1876                                             1 - shiftAmt));
1877            }
1878            destElem += rBit;
1879        } else {
1880            if (shiftAmt >= sizeof(Element) * 8) {
1881                if (srcElem1 != 0) {
1882                    destElem = mask(sizeof(Element) * 8);
1883                    fpscr.qc = 1;
1884                } else {
1885                    destElem = 0;
1886                }
1887            } else {
1888                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1889                            sizeof(Element) * 8 - shiftAmt)) {
1890                    destElem = mask(sizeof(Element) * 8);
1891                    fpscr.qc = 1;
1892                } else {
1893                    destElem = srcElem1 << shiftAmt;
1894                }
1895            }
1896        }
1897        Fpscr = fpscr;
1898    '''
1899    threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1900    threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1901
1902    vqrshlSCode = '''
1903        int16_t shiftAmt = (int8_t)srcElem2;
1904        FPSCR fpscr = (FPSCR)Fpscr;
1905        if (shiftAmt < 0) {
1906            shiftAmt = -shiftAmt;
1907            Element rBit = 0;
1908            if (shiftAmt <= sizeof(Element) * 8)
1909                rBit = bits(srcElem1, shiftAmt - 1);
1910            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1911                rBit = 1;
1912            if (shiftAmt >= sizeof(Element) * 8) {
1913                shiftAmt = sizeof(Element) * 8 - 1;
1914                destElem = 0;
1915            } else {
1916                destElem = (srcElem1 >> shiftAmt);
1917            }
1918            // Make sure the right shift sign extended when it should.
1919            if (srcElem1 < 0 && destElem >= 0) {
1920                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1921                                             1 - shiftAmt));
1922            }
1923            destElem += rBit;
1924        } else if (shiftAmt > 0) {
1925            bool sat = false;
1926            if (shiftAmt >= sizeof(Element) * 8) {
1927                if (srcElem1 != 0)
1928                    sat = true;
1929                else
1930                    destElem = 0;
1931            } else {
1932                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1933                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1934                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1935                    sat = true;
1936                } else {
1937                    destElem = srcElem1 << shiftAmt;
1938                }
1939            }
1940            if (sat) {
1941                fpscr.qc = 1;
1942                destElem = mask(sizeof(Element) * 8 - 1);
1943                if (srcElem1 < 0)
1944                    destElem = ~destElem;
1945            }
1946        } else {
1947            destElem = srcElem1;
1948        }
1949        Fpscr = fpscr;
1950    '''
1951    threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1952    threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1953
1954    vabaCode = '''
1955        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1956                                            (srcElem2 - srcElem1);
1957    '''
1958    threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1959    threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1960    vabalCode = '''
1961        destElem += (srcElem1 > srcElem2) ?
1962            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1963            ((BigElement)srcElem2 - (BigElement)srcElem1);
1964    '''
1965    threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1966
1967    vabdCode = '''
1968        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1969                                           (srcElem2 - srcElem1);
1970    '''
1971    threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1972    threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1973    vabdlCode = '''
1974        destElem = (srcElem1 > srcElem2) ?
1975            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1976            ((BigElement)srcElem2 - (BigElement)srcElem1);
1977    '''
1978    threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1979
1980    vtstCode = '''
1981        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1982    '''
1983    threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1984    threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1985
1986    vmulCode = '''
1987        destElem = srcElem1 * srcElem2;
1988    '''
1989    threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1990    threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1991    vmullCode = '''
1992        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1993    '''
1994    threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1995
1996    vmlaCode = '''
1997        destElem = destElem + srcElem1 * srcElem2;
1998    '''
1999    threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
2000    threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
2001    vmlalCode = '''
2002        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2003    '''
2004    threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
2005
2006    vqdmlalCode = '''
2007        FPSCR fpscr = (FPSCR)Fpscr;
2008        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2009        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2010        Element halfNeg = maxNeg / 2;
2011        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2012            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2013            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2014            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2015            fpscr.qc = 1;
2016        }
2017        bool negPreDest = (destElem < 0);
2018        destElem += midElem;
2019        bool negDest = (destElem < 0);
2020        bool negMid = (midElem < 0);
2021        if (negPreDest == negMid && negMid != negDest) {
2022            destElem = mask(sizeof(BigElement) * 8 - 1);
2023            if (negPreDest)
2024                destElem = ~destElem;
2025            fpscr.qc = 1;
2026        }
2027        Fpscr = fpscr;
2028    '''
2029    threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2030
2031    vqdmlslCode = '''
2032        FPSCR fpscr = (FPSCR)Fpscr;
2033        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2034        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2035        Element halfNeg = maxNeg / 2;
2036        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2037            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2038            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2039            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2040            fpscr.qc = 1;
2041        }
2042        bool negPreDest = (destElem < 0);
2043        destElem -= midElem;
2044        bool negDest = (destElem < 0);
2045        bool posMid = (midElem > 0);
2046        if (negPreDest == posMid && posMid != negDest) {
2047            destElem = mask(sizeof(BigElement) * 8 - 1);
2048            if (negPreDest)
2049                destElem = ~destElem;
2050            fpscr.qc = 1;
2051        }
2052        Fpscr = fpscr;
2053    '''
2054    threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2055
2056    vqdmullCode = '''
2057        FPSCR fpscr = (FPSCR)Fpscr;
2058        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2059        if (srcElem1 == srcElem2 &&
2060                srcElem1 == (Element)((Element)1 <<
2061                    (Element)(sizeof(Element) * 8 - 1))) {
2062            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2063            fpscr.qc = 1;
2064        }
2065        Fpscr = fpscr;
2066    '''
2067    threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2068
2069    vmlsCode = '''
2070        destElem = destElem - srcElem1 * srcElem2;
2071    '''
2072    threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2073    threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2074    vmlslCode = '''
2075        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2076    '''
2077    threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2078
2079    vmulpCode = '''
2080        destElem = 0;
2081        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2082            if (bits(srcElem2, j))
2083                destElem ^= srcElem1 << j;
2084        }
2085    '''
2086    threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2087    threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2088    vmullpCode = '''
2089        destElem = 0;
2090        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2091            if (bits(srcElem2, j))
2092                destElem ^= (BigElement)srcElem1 << j;
2093        }
2094    '''
2095    threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2096
2097    threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2098    threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2099
2100    threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2101    threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2102
2103    vqdmulhCode = '''
2104        FPSCR fpscr = (FPSCR)Fpscr;
2105        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2106                   (sizeof(Element) * 8);
2107        if (srcElem1 == srcElem2 &&
2108                srcElem1 == (Element)((Element)1 <<
2109                    (sizeof(Element) * 8 - 1))) {
2110            destElem = ~srcElem1;
2111            fpscr.qc = 1;
2112        }
2113        Fpscr = fpscr;
2114    '''
2115    threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2116    threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2117
2118    vqrdmulhCode = '''
2119        FPSCR fpscr = (FPSCR)Fpscr;
2120        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2121                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2122                   (sizeof(Element) * 8);
2123        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2124        Element halfNeg = maxNeg / 2;
2125        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2126            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2127            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2128            if (destElem < 0) {
2129                destElem = mask(sizeof(Element) * 8 - 1);
2130            } else {
2131                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2132            }
2133            fpscr.qc = 1;
2134        }
2135        Fpscr = fpscr;
2136    '''
2137    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2138            smallSignedTypes, 2, vqrdmulhCode)
2139    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2140            smallSignedTypes, 4, vqrdmulhCode)
2141
2142    vmaxfpCode = '''
2143        FPSCR fpscr = (FPSCR)Fpscr;
2144        bool done;
2145        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2146        if (!done) {
2147            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2148                               true, true, VfpRoundNearest);
2149        } else if (flushToZero(srcReg1, srcReg2)) {
2150            fpscr.idc = 1;
2151        }
2152        Fpscr = fpscr;
2153    '''
2154    threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2155    threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2156
2157    vminfpCode = '''
2158        FPSCR fpscr = (FPSCR)Fpscr;
2159        bool done;
2160        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2161        if (!done) {
2162            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2163                               true, true, VfpRoundNearest);
2164        } else if (flushToZero(srcReg1, srcReg2)) {
2165            fpscr.idc = 1;
2166        }
2167        Fpscr = fpscr;
2168    '''
2169    threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2170    threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2171
2172    threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2173                        2, vmaxfpCode, pairwise=True)
2174    threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2175                        4, vmaxfpCode, pairwise=True)
2176
2177    threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2178                        2, vminfpCode, pairwise=True)
2179    threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2180                        4, vminfpCode, pairwise=True)
2181
2182    vaddfpCode = '''
2183        FPSCR fpscr = Fpscr;
2184        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2185                           true, true, VfpRoundNearest);
2186        Fpscr = fpscr;
2187    '''
2188    threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2189    threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2190
2191    threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2192                        2, vaddfpCode, pairwise=True)
2193    threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2194                        4, vaddfpCode, pairwise=True)
2195
2196    vsubfpCode = '''
2197        FPSCR fpscr = Fpscr;
2198        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2199                           true, true, VfpRoundNearest);
2200        Fpscr = fpscr;
2201    '''
2202    threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2203    threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2204
2205    vmulfpCode = '''
2206        FPSCR fpscr = Fpscr;
2207        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2208                           true, true, VfpRoundNearest);
2209        Fpscr = fpscr;
2210    '''
2211    threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2212    threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2213
2214    vmlafpCode = '''
2215        FPSCR fpscr = Fpscr;
2216        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2217                             true, true, VfpRoundNearest);
2218        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2219                           true, true, VfpRoundNearest);
2220        Fpscr = fpscr;
2221    '''
2222    threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2223    threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2224
2225    vmlsfpCode = '''
2226        FPSCR fpscr = Fpscr;
2227        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2228                             true, true, VfpRoundNearest);
2229        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2230                           true, true, VfpRoundNearest);
2231        Fpscr = fpscr;
2232    '''
2233    threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2234    threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2235
2236    vcgtfpCode = '''
2237        FPSCR fpscr = (FPSCR)Fpscr;
2238        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2239                             true, true, VfpRoundNearest);
2240        destReg = (res == 0) ? -1 : 0;
2241        if (res == 2.0)
2242            fpscr.ioc = 1;
2243        Fpscr = fpscr;
2244    '''
2245    threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2246            2, vcgtfpCode, toInt = True)
2247    threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2248            4, vcgtfpCode, toInt = True)
2249
2250    vcgefpCode = '''
2251        FPSCR fpscr = (FPSCR)Fpscr;
2252        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2253                             true, true, VfpRoundNearest);
2254        destReg = (res == 0) ? -1 : 0;
2255        if (res == 2.0)
2256            fpscr.ioc = 1;
2257        Fpscr = fpscr;
2258    '''
2259    threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2260            2, vcgefpCode, toInt = True)
2261    threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2262            4, vcgefpCode, toInt = True)
2263
2264    vacgtfpCode = '''
2265        FPSCR fpscr = (FPSCR)Fpscr;
2266        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2267                             true, true, VfpRoundNearest);
2268        destReg = (res == 0) ? -1 : 0;
2269        if (res == 2.0)
2270            fpscr.ioc = 1;
2271        Fpscr = fpscr;
2272    '''
2273    threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2274            2, vacgtfpCode, toInt = True)
2275    threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2276            4, vacgtfpCode, toInt = True)
2277
2278    vacgefpCode = '''
2279        FPSCR fpscr = (FPSCR)Fpscr;
2280        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2281                             true, true, VfpRoundNearest);
2282        destReg = (res == 0) ? -1 : 0;
2283        if (res == 2.0)
2284            fpscr.ioc = 1;
2285        Fpscr = fpscr;
2286    '''
2287    threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2288            2, vacgefpCode, toInt = True)
2289    threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2290            4, vacgefpCode, toInt = True)
2291
2292    vceqfpCode = '''
2293        FPSCR fpscr = (FPSCR)Fpscr;
2294        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2295                             true, true, VfpRoundNearest);
2296        destReg = (res == 0) ? -1 : 0;
2297        if (res == 2.0)
2298            fpscr.ioc = 1;
2299        Fpscr = fpscr;
2300    '''
2301    threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2302            2, vceqfpCode, toInt = True)
2303    threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2304            4, vceqfpCode, toInt = True)
2305
2306    vrecpsCode = '''
2307        FPSCR fpscr = Fpscr;
2308        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2309                           true, true, VfpRoundNearest);
2310        Fpscr = fpscr;
2311    '''
2312    threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2313    threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2314
2315    vrsqrtsCode = '''
2316        FPSCR fpscr = Fpscr;
2317        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2318                           true, true, VfpRoundNearest);
2319        Fpscr = fpscr;
2320    '''
2321    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2322    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2323
2324    vabdfpCode = '''
2325        FPSCR fpscr = Fpscr;
2326        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2327                             true, true, VfpRoundNearest);
2328        destReg = fabs(mid);
2329        Fpscr = fpscr;
2330    '''
2331    threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2332    threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2333
2334    twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2335    twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2336    twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2337    twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2338    twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2339
2340    twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2341    twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2342    twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2343    twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2344    twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2345
2346    twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2347    twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2348    twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2349    twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2350    twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2351
2352    twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2353    twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2354    twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2355    twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2356    twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2357    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2358            smallSignedTypes, 2, vqrdmulhCode)
2359    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2360            smallSignedTypes, 4, vqrdmulhCode)
2361
2362    vshrCode = '''
2363        if (imm >= sizeof(srcElem1) * 8) {
2364            if (srcElem1 < 0)
2365                destElem = -1;
2366            else
2367                destElem = 0;
2368        } else {
2369            destElem = srcElem1 >> imm;
2370        }
2371    '''
2372    twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2373    twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2374
2375    vsraCode = '''
2376        Element mid;;
2377        if (imm >= sizeof(srcElem1) * 8) {
2378            mid = (srcElem1 < 0) ? -1 : 0;
2379        } else {
2380            mid = srcElem1 >> imm;
2381            if (srcElem1 < 0 && mid >= 0) {
2382                mid |= -(mid & ((Element)1 <<
2383                            (sizeof(Element) * 8 - 1 - imm)));
2384            }
2385        }
2386        destElem += mid;
2387    '''
2388    twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2389    twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2390
2391    vrshrCode = '''
2392        if (imm > sizeof(srcElem1) * 8) {
2393            destElem = 0;
2394        } else if (imm) {
2395            Element rBit = bits(srcElem1, imm - 1);
2396            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2397        } else {
2398            destElem = srcElem1;
2399        }
2400    '''
2401    twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2402    twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2403
2404    vrsraCode = '''
2405        if (imm > sizeof(srcElem1) * 8) {
2406            destElem += 0;
2407        } else if (imm) {
2408            Element rBit = bits(srcElem1, imm - 1);
2409            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2410        } else {
2411            destElem += srcElem1;
2412        }
2413    '''
2414    twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2415    twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2416
2417    vsriCode = '''
2418        if (imm >= sizeof(Element) * 8)
2419            destElem = destElem;
2420        else
2421            destElem = (srcElem1 >> imm) |
2422                (destElem & ~mask(sizeof(Element) * 8 - imm));
2423    '''
2424    twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2425    twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2426
2427    vshlCode = '''
2428        if (imm >= sizeof(Element) * 8)
2429            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2430        else
2431            destElem = srcElem1 << imm;
2432    '''
2433    twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2434    twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2435
2436    vsliCode = '''
2437        if (imm >= sizeof(Element) * 8)
2438            destElem = destElem;
2439        else
2440            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2441    '''
2442    twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2443    twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2444
2445    vqshlCode = '''
2446        FPSCR fpscr = (FPSCR)Fpscr;
2447        if (imm >= sizeof(Element) * 8) {
2448            if (srcElem1 != 0) {
2449                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2450                if (srcElem1 > 0)
2451                    destElem = ~destElem;
2452                fpscr.qc = 1;
2453            } else {
2454                destElem = 0;
2455            }
2456        } else if (imm) {
2457            destElem = (srcElem1 << imm);
2458            uint64_t topBits = bits((uint64_t)srcElem1,
2459                                    sizeof(Element) * 8 - 1,
2460                                    sizeof(Element) * 8 - 1 - imm);
2461            if (topBits != 0 && topBits != mask(imm + 1)) {
2462                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2463                if (srcElem1 > 0)
2464                    destElem = ~destElem;
2465                fpscr.qc = 1;
2466            }
2467        } else {
2468            destElem = srcElem1;
2469        }
2470        Fpscr = fpscr;
2471    '''
2472    twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2473    twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2474
2475    vqshluCode = '''
2476        FPSCR fpscr = (FPSCR)Fpscr;
2477        if (imm >= sizeof(Element) * 8) {
2478            if (srcElem1 != 0) {
2479                destElem = mask(sizeof(Element) * 8);
2480                fpscr.qc = 1;
2481            } else {
2482                destElem = 0;
2483            }
2484        } else if (imm) {
2485            destElem = (srcElem1 << imm);
2486            uint64_t topBits = bits((uint64_t)srcElem1,
2487                                    sizeof(Element) * 8 - 1,
2488                                    sizeof(Element) * 8 - imm);
2489            if (topBits != 0) {
2490                destElem = mask(sizeof(Element) * 8);
2491                fpscr.qc = 1;
2492            }
2493        } else {
2494            destElem = srcElem1;
2495        }
2496        Fpscr = fpscr;
2497    '''
2498    twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2499    twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2500
2501    vqshlusCode = '''
2502        FPSCR fpscr = (FPSCR)Fpscr;
2503        if (imm >= sizeof(Element) * 8) {
2504            if (srcElem1 < 0) {
2505                destElem = 0;
2506                fpscr.qc = 1;
2507            } else if (srcElem1 > 0) {
2508                destElem = mask(sizeof(Element) * 8);
2509                fpscr.qc = 1;
2510            } else {
2511                destElem = 0;
2512            }
2513        } else if (imm) {
2514            destElem = (srcElem1 << imm);
2515            uint64_t topBits = bits((uint64_t)srcElem1,
2516                                    sizeof(Element) * 8 - 1,
2517                                    sizeof(Element) * 8 - imm);
2518            if (srcElem1 < 0) {
2519                destElem = 0;
2520                fpscr.qc = 1;
2521            } else if (topBits != 0) {
2522                destElem = mask(sizeof(Element) * 8);
2523                fpscr.qc = 1;
2524            }
2525        } else {
2526            if (srcElem1 < 0) {
2527                fpscr.qc = 1;
2528                destElem = 0;
2529            } else {
2530                destElem = srcElem1;
2531            }
2532        }
2533        Fpscr = fpscr;
2534    '''
2535    twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2536    twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2537
2538    vshrnCode = '''
2539        if (imm >= sizeof(srcElem1) * 8) {
2540            destElem = 0;
2541        } else {
2542            destElem = srcElem1 >> imm;
2543        }
2544    '''
2545    twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2546
2547    vrshrnCode = '''
2548        if (imm > sizeof(srcElem1) * 8) {
2549            destElem = 0;
2550        } else if (imm) {
2551            Element rBit = bits(srcElem1, imm - 1);
2552            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2553        } else {
2554            destElem = srcElem1;
2555        }
2556    '''
2557    twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2558
2559    vqshrnCode = '''
2560        FPSCR fpscr = (FPSCR)Fpscr;
2561        if (imm > sizeof(srcElem1) * 8) {
2562            if (srcElem1 != 0 && srcElem1 != -1)
2563                fpscr.qc = 1;
2564            destElem = 0;
2565        } else if (imm) {
2566            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2567            mid |= -(mid & ((BigElement)1 <<
2568                        (sizeof(BigElement) * 8 - 1 - imm)));
2569            if (mid != (Element)mid) {
2570                destElem = mask(sizeof(Element) * 8 - 1);
2571                if (srcElem1 < 0)
2572                    destElem = ~destElem;
2573                fpscr.qc = 1;
2574            } else {
2575                destElem = mid;
2576            }
2577        } else {
2578            destElem = srcElem1;
2579        }
2580        Fpscr = fpscr;
2581    '''
2582    twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2583
2584    vqshrunCode = '''
2585        FPSCR fpscr = (FPSCR)Fpscr;
2586        if (imm > sizeof(srcElem1) * 8) {
2587            if (srcElem1 != 0)
2588                fpscr.qc = 1;
2589            destElem = 0;
2590        } else if (imm) {
2591            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2592            if (mid != (Element)mid) {
2593                destElem = mask(sizeof(Element) * 8);
2594                fpscr.qc = 1;
2595            } else {
2596                destElem = mid;
2597            }
2598        } else {
2599            destElem = srcElem1;
2600        }
2601        Fpscr = fpscr;
2602    '''
2603    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2604            smallUnsignedTypes, vqshrunCode)
2605
2606    vqshrunsCode = '''
2607        FPSCR fpscr = (FPSCR)Fpscr;
2608        if (imm > sizeof(srcElem1) * 8) {
2609            if (srcElem1 != 0)
2610                fpscr.qc = 1;
2611            destElem = 0;
2612        } else if (imm) {
2613            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2614            if (bits(mid, sizeof(BigElement) * 8 - 1,
2615                          sizeof(Element) * 8) != 0) {
2616                if (srcElem1 < 0) {
2617                    destElem = 0;
2618                } else {
2619                    destElem = mask(sizeof(Element) * 8);
2620                }
2621                fpscr.qc = 1;
2622            } else {
2623                destElem = mid;
2624            }
2625        } else {
2626            destElem = srcElem1;
2627        }
2628        Fpscr = fpscr;
2629    '''
2630    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2631            smallSignedTypes, vqshrunsCode)
2632
2633    vqrshrnCode = '''
2634        FPSCR fpscr = (FPSCR)Fpscr;
2635        if (imm > sizeof(srcElem1) * 8) {
2636            if (srcElem1 != 0 && srcElem1 != -1)
2637                fpscr.qc = 1;
2638            destElem = 0;
2639        } else if (imm) {
2640            BigElement mid = (srcElem1 >> (imm - 1));
2641            uint64_t rBit = mid & 0x1;
2642            mid >>= 1;
2643            mid |= -(mid & ((BigElement)1 <<
2644                        (sizeof(BigElement) * 8 - 1 - imm)));
2645            mid += rBit;
2646            if (mid != (Element)mid) {
2647                destElem = mask(sizeof(Element) * 8 - 1);
2648                if (srcElem1 < 0)
2649                    destElem = ~destElem;
2650                fpscr.qc = 1;
2651            } else {
2652                destElem = mid;
2653            }
2654        } else {
2655            if (srcElem1 != (Element)srcElem1) {
2656                destElem = mask(sizeof(Element) * 8 - 1);
2657                if (srcElem1 < 0)
2658                    destElem = ~destElem;
2659                fpscr.qc = 1;
2660            } else {
2661                destElem = srcElem1;
2662            }
2663        }
2664        Fpscr = fpscr;
2665    '''
2666    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2667            smallSignedTypes, vqrshrnCode)
2668
2669    vqrshrunCode = '''
2670        FPSCR fpscr = (FPSCR)Fpscr;
2671        if (imm > sizeof(srcElem1) * 8) {
2672            if (srcElem1 != 0)
2673                fpscr.qc = 1;
2674            destElem = 0;
2675        } else if (imm) {
2676            BigElement mid = (srcElem1 >> (imm - 1));
2677            uint64_t rBit = mid & 0x1;
2678            mid >>= 1;
2679            mid += rBit;
2680            if (mid != (Element)mid) {
2681                destElem = mask(sizeof(Element) * 8);
2682                fpscr.qc = 1;
2683            } else {
2684                destElem = mid;
2685            }
2686        } else {
2687            if (srcElem1 != (Element)srcElem1) {
2688                destElem = mask(sizeof(Element) * 8 - 1);
2689                if (srcElem1 < 0)
2690                    destElem = ~destElem;
2691                fpscr.qc = 1;
2692            } else {
2693                destElem = srcElem1;
2694            }
2695        }
2696        Fpscr = fpscr;
2697    '''
2698    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2699            smallUnsignedTypes, vqrshrunCode)
2700
2701    vqrshrunsCode = '''
2702        FPSCR fpscr = (FPSCR)Fpscr;
2703        if (imm > sizeof(srcElem1) * 8) {
2704            if (srcElem1 != 0)
2705                fpscr.qc = 1;
2706            destElem = 0;
2707        } else if (imm) {
2708            BigElement mid = (srcElem1 >> (imm - 1));
2709            uint64_t rBit = mid & 0x1;
2710            mid >>= 1;
2711            mid |= -(mid & ((BigElement)1 <<
2712                            (sizeof(BigElement) * 8 - 1 - imm)));
2713            mid += rBit;
2714            if (bits(mid, sizeof(BigElement) * 8 - 1,
2715                          sizeof(Element) * 8) != 0) {
2716                if (srcElem1 < 0) {
2717                    destElem = 0;
2718                } else {
2719                    destElem = mask(sizeof(Element) * 8);
2720                }
2721                fpscr.qc = 1;
2722            } else {
2723                destElem = mid;
2724            }
2725        } else {
2726            if (srcElem1 < 0) {
2727                fpscr.qc = 1;
2728                destElem = 0;
2729            } else {
2730                destElem = srcElem1;
2731            }
2732        }
2733        Fpscr = fpscr;
2734    '''
2735    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2736            smallSignedTypes, vqrshrunsCode)
2737
2738    vshllCode = '''
2739        if (imm >= sizeof(destElem) * 8) {
2740            destElem = 0;
2741        } else {
2742            destElem = (BigElement)srcElem1 << imm;
2743        }
2744    '''
2745    twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2746
2747    vmovlCode = '''
2748        destElem = srcElem1;
2749    '''
2750    twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2751
2752    vcvt2ufxCode = '''
2753        FPSCR fpscr = Fpscr;
2754        if (flushToZero(srcElem1))
2755            fpscr.idc = 1;
2756        VfpSavedState state = prepFpState(VfpRoundNearest);
2757        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2758        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2759        __asm__ __volatile__("" :: "m" (destReg));
2760        finishVfp(fpscr, state, true);
2761        Fpscr = fpscr;
2762    '''
2763    twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2764            2, vcvt2ufxCode, toInt = True)
2765    twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2766            4, vcvt2ufxCode, toInt = True)
2767
2768    vcvt2sfxCode = '''
2769        FPSCR fpscr = Fpscr;
2770        if (flushToZero(srcElem1))
2771            fpscr.idc = 1;
2772        VfpSavedState state = prepFpState(VfpRoundNearest);
2773        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2774        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2775        __asm__ __volatile__("" :: "m" (destReg));
2776        finishVfp(fpscr, state, true);
2777        Fpscr = fpscr;
2778    '''
2779    twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2780            2, vcvt2sfxCode, toInt = True)
2781    twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2782            4, vcvt2sfxCode, toInt = True)
2783
2784    vcvtu2fpCode = '''
2785        FPSCR fpscr = Fpscr;
2786        VfpSavedState state = prepFpState(VfpRoundNearest);
2787        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2788        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2789        __asm__ __volatile__("" :: "m" (destElem));
2790        finishVfp(fpscr, state, true);
2791        Fpscr = fpscr;
2792    '''
2793    twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2794            2, vcvtu2fpCode, fromInt = True)
2795    twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2796            4, vcvtu2fpCode, fromInt = True)
2797
2798    vcvts2fpCode = '''
2799        FPSCR fpscr = Fpscr;
2800        VfpSavedState state = prepFpState(VfpRoundNearest);
2801        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2802        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2803        __asm__ __volatile__("" :: "m" (destElem));
2804        finishVfp(fpscr, state, true);
2805        Fpscr = fpscr;
2806    '''
2807    twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2808            2, vcvts2fpCode, fromInt = True)
2809    twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2810            4, vcvts2fpCode, fromInt = True)
2811
2812    vcvts2hCode = '''
2813        FPSCR fpscr = Fpscr;
2814        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2815        if (flushToZero(srcFp1))
2816            fpscr.idc = 1;
2817        VfpSavedState state = prepFpState(VfpRoundNearest);
2818        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2819                                : "m" (srcFp1), "m" (destElem));
2820        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2821                              fpscr.ahp, srcFp1);
2822        __asm__ __volatile__("" :: "m" (destElem));
2823        finishVfp(fpscr, state, true);
2824        Fpscr = fpscr;
2825    '''
2826    twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2827
2828    vcvth2sCode = '''
2829        FPSCR fpscr = Fpscr;
2830        VfpSavedState state = prepFpState(VfpRoundNearest);
2831        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2832                                : "m" (srcElem1), "m" (destElem));
2833        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2834        __asm__ __volatile__("" :: "m" (destElem));
2835        finishVfp(fpscr, state, true);
2836        Fpscr = fpscr;
2837    '''
2838    twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2839
2840    vrsqrteCode = '''
2841        destElem = unsignedRSqrtEstimate(srcElem1);
2842    '''
2843    twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2844    twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2845
2846    vrsqrtefpCode = '''
2847        FPSCR fpscr = Fpscr;
2848        if (flushToZero(srcReg1))
2849            fpscr.idc = 1;
2850        destReg = fprSqrtEstimate(fpscr, srcReg1);
2851        Fpscr = fpscr;
2852    '''
2853    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2854    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2855
2856    vrecpeCode = '''
2857        destElem = unsignedRecipEstimate(srcElem1);
2858    '''
2859    twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2860    twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2861
2862    vrecpefpCode = '''
2863        FPSCR fpscr = Fpscr;
2864        if (flushToZero(srcReg1))
2865            fpscr.idc = 1;
2866        destReg = fpRecipEstimate(fpscr, srcReg1);
2867        Fpscr = fpscr;
2868    '''
2869    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2870    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2871
2872    vrev16Code = '''
2873        destElem = srcElem1;
2874        unsigned groupSize = ((1 << 1) / sizeof(Element));
2875        unsigned reverseMask = (groupSize - 1);
2876        j = i ^ reverseMask;
2877    '''
2878    twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2879    twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2880    vrev32Code = '''
2881        destElem = srcElem1;
2882        unsigned groupSize = ((1 << 2) / sizeof(Element));
2883        unsigned reverseMask = (groupSize - 1);
2884        j = i ^ reverseMask;
2885    '''
2886    twoRegMiscInst("vrev32", "NVrev32D",
2887            ("uint8_t", "uint16_t"), 2, vrev32Code)
2888    twoRegMiscInst("vrev32", "NVrev32Q",
2889            ("uint8_t", "uint16_t"), 4, vrev32Code)
2890    vrev64Code = '''
2891        destElem = srcElem1;
2892        unsigned groupSize = ((1 << 3) / sizeof(Element));
2893        unsigned reverseMask = (groupSize - 1);
2894        j = i ^ reverseMask;
2895    '''
2896    twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2897    twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2898
2899    vpaddlCode = '''
2900        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2901    '''
2902    twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2903    twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2904
2905    vpadalCode = '''
2906        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2907    '''
2908    twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2909    twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2910
2911    vclsCode = '''
2912        unsigned count = 0;
2913        if (srcElem1 < 0) {
2914            srcElem1 <<= 1;
2915            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2916                count++;
2917                srcElem1 <<= 1;
2918            }
2919        } else {
2920            srcElem1 <<= 1;
2921            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2922                count++;
2923                srcElem1 <<= 1;
2924            }
2925        }
2926        destElem = count;
2927    '''
2928    twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2929    twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2930
2931    vclzCode = '''
2932        unsigned count = 0;
2933        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2934            count++;
2935            srcElem1 <<= 1;
2936        }
2937        destElem = count;
2938    '''
2939    twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2940    twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2941
2942    vcntCode = '''
2943        unsigned count = 0;
2944        while (srcElem1 && count < sizeof(Element) * 8) {
2945            count += srcElem1 & 0x1;
2946            srcElem1 >>= 1;
2947        }
2948        destElem = count;
2949    '''
2950    twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2951    twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2952
2953    vmvnCode = '''
2954        destElem = ~srcElem1;
2955    '''
2956    twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2957    twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2958
2959    vqabsCode = '''
2960        FPSCR fpscr = (FPSCR)Fpscr;
2961        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2962            fpscr.qc = 1;
2963            destElem = ~srcElem1;
2964        } else if (srcElem1 < 0) {
2965            destElem = -srcElem1;
2966        } else {
2967            destElem = srcElem1;
2968        }
2969        Fpscr = fpscr;
2970    '''
2971    twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2972    twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2973
2974    vqnegCode = '''
2975        FPSCR fpscr = (FPSCR)Fpscr;
2976        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2977            fpscr.qc = 1;
2978            destElem = ~srcElem1;
2979        } else {
2980            destElem = -srcElem1;
2981        }
2982        Fpscr = fpscr;
2983    '''
2984    twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2985    twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2986
2987    vabsCode = '''
2988        if (srcElem1 < 0) {
2989            destElem = -srcElem1;
2990        } else {
2991            destElem = srcElem1;
2992        }
2993    '''
2994    twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2995    twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2996    vabsfpCode = '''
2997        union
2998        {
2999            uint32_t i;
3000            float f;
3001        } cStruct;
3002        cStruct.f = srcReg1;
3003        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3004        destReg = cStruct.f;
3005    '''
3006    twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
3007    twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
3008
3009    vnegCode = '''
3010        destElem = -srcElem1;
3011    '''
3012    twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
3013    twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
3014    vnegfpCode = '''
3015        destReg = -srcReg1;
3016    '''
3017    twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
3018    twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
3019
3020    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3021    twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3022    twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3023    vcgtfpCode = '''
3024        FPSCR fpscr = (FPSCR)Fpscr;
3025        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3026                             true, true, VfpRoundNearest);
3027        destReg = (res == 0) ? -1 : 0;
3028        if (res == 2.0)
3029            fpscr.ioc = 1;
3030        Fpscr = fpscr;
3031    '''
3032    twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3033            2, vcgtfpCode, toInt = True)
3034    twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3035            4, vcgtfpCode, toInt = True)
3036
3037    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3038    twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3039    twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3040    vcgefpCode = '''
3041        FPSCR fpscr = (FPSCR)Fpscr;
3042        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3043                             true, true, VfpRoundNearest);
3044        destReg = (res == 0) ? -1 : 0;
3045        if (res == 2.0)
3046            fpscr.ioc = 1;
3047        Fpscr = fpscr;
3048    '''
3049    twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3050            2, vcgefpCode, toInt = True)
3051    twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3052            4, vcgefpCode, toInt = True)
3053
3054    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3055    twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3056    twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3057    vceqfpCode = '''
3058        FPSCR fpscr = (FPSCR)Fpscr;
3059        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3060                             true, true, VfpRoundNearest);
3061        destReg = (res == 0) ? -1 : 0;
3062        if (res == 2.0)
3063            fpscr.ioc = 1;
3064        Fpscr = fpscr;
3065    '''
3066    twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3067            2, vceqfpCode, toInt = True)
3068    twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3069            4, vceqfpCode, toInt = True)
3070
3071    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3072    twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3073    twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3074    vclefpCode = '''
3075        FPSCR fpscr = (FPSCR)Fpscr;
3076        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3077                             true, true, VfpRoundNearest);
3078        destReg = (res == 0) ? -1 : 0;
3079        if (res == 2.0)
3080            fpscr.ioc = 1;
3081        Fpscr = fpscr;
3082    '''
3083    twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3084            2, vclefpCode, toInt = True)
3085    twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3086            4, vclefpCode, toInt = True)
3087
3088    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3089    twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3090    twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3091    vcltfpCode = '''
3092        FPSCR fpscr = (FPSCR)Fpscr;
3093        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3094                             true, true, VfpRoundNearest);
3095        destReg = (res == 0) ? -1 : 0;
3096        if (res == 2.0)
3097            fpscr.ioc = 1;
3098        Fpscr = fpscr;
3099    '''
3100    twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3101            2, vcltfpCode, toInt = True)
3102    twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3103            4, vcltfpCode, toInt = True)
3104
3105    vswpCode = '''
3106        FloatRegBits mid;
3107        for (unsigned r = 0; r < rCount; r++) {
3108            mid = srcReg1.regs[r];
3109            srcReg1.regs[r] = destReg.regs[r];
3110            destReg.regs[r] = mid;
3111        }
3112    '''
3113    twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3114    twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3115
3116    vtrnCode = '''
3117        Element mid;
3118        for (unsigned i = 0; i < eCount; i += 2) {
3119            mid = srcReg1.elements[i];
3120            srcReg1.elements[i] = destReg.elements[i + 1];
3121            destReg.elements[i + 1] = mid;
3122        }
3123    '''
3124    twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3125    twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3126
3127    vuzpCode = '''
3128        Element mid[eCount];
3129        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3130        for (unsigned i = 0; i < eCount / 2; i++) {
3131            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3132            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3133            destReg.elements[i] = destReg.elements[2 * i];
3134        }
3135        for (unsigned i = 0; i < eCount / 2; i++) {
3136            destReg.elements[eCount / 2 + i] = mid[2 * i];
3137        }
3138    '''
3139    twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3140    twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3141
3142    vzipCode = '''
3143        Element mid[eCount];
3144        memcpy(&mid, &destReg, sizeof(destReg));
3145        for (unsigned i = 0; i < eCount / 2; i++) {
3146            destReg.elements[2 * i] = mid[i];
3147            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3148        }
3149        for (int i = 0; i < eCount / 2; i++) {
3150            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3151            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3152        }
3153    '''
3154    twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3155    twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3156
3157    vmovnCode = 'destElem = srcElem1;'
3158    twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3159
3160    vdupCode = 'destElem = srcElem1;'
3161    twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3162    twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3163
3164    def vdupGprInst(name, Name, types, rCount):
3165        global header_output, exec_output
3166        eWalkCode = '''
3167        RegVect destReg;
3168        for (unsigned i = 0; i < eCount; i++) {
3169            destReg.elements[i] = htog((Element)Op1);
3170        }
3171        '''
3172        for reg in range(rCount):
3173            eWalkCode += '''
3174            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3175            ''' % { "reg" : reg }
3176        iop = InstObjParams(name, Name,
3177                            "RegRegOp",
3178                            { "code": eWalkCode,
3179                              "r_count": rCount,
3180                              "predicate_test": predicateTest }, [])
3181        header_output += NeonRegRegOpDeclare.subst(iop)
3182        exec_output += NeonEqualRegExecute.subst(iop)
3183        for type in types:
3184            substDict = { "targs" : type,
3185                          "class_name" : Name }
3186            exec_output += NeonExecDeclare.subst(substDict)
3187    vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3188    vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3189
3190    vmovCode = 'destElem = imm;'
3191    oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3192    oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3193
3194    vorrCode = 'destElem |= imm;'
3195    oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3196    oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3197
3198    vmvnCode = 'destElem = ~imm;'
3199    oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3200    oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3201
3202    vbicCode = 'destElem &= ~imm;'
3203    oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3204    oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3205
3206    vqmovnCode = '''
3207    FPSCR fpscr = (FPSCR)Fpscr;
3208    destElem = srcElem1;
3209    if ((BigElement)destElem != srcElem1) {
3210        fpscr.qc = 1;
3211        destElem = mask(sizeof(Element) * 8 - 1);
3212        if (srcElem1 < 0)
3213            destElem = ~destElem;
3214    }
3215    Fpscr = fpscr;
3216    '''
3217    twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3218
3219    vqmovunCode = '''
3220    FPSCR fpscr = (FPSCR)Fpscr;
3221    destElem = srcElem1;
3222    if ((BigElement)destElem != srcElem1) {
3223        fpscr.qc = 1;
3224        destElem = mask(sizeof(Element) * 8);
3225    }
3226    Fpscr = fpscr;
3227    '''
3228    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3229            smallUnsignedTypes, vqmovunCode)
3230
3231    vqmovunsCode = '''
3232    FPSCR fpscr = (FPSCR)Fpscr;
3233    destElem = srcElem1;
3234    if (srcElem1 < 0 ||
3235            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3236        fpscr.qc = 1;
3237        destElem = mask(sizeof(Element) * 8);
3238        if (srcElem1 < 0)
3239            destElem = ~destElem;
3240    }
3241    Fpscr = fpscr;
3242    '''
3243    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3244            smallSignedTypes, vqmovunsCode)
3245
3246    def buildVext(name, Name, types, rCount, op):
3247        global header_output, exec_output
3248        eWalkCode = '''
3249        RegVect srcReg1, srcReg2, destReg;
3250        '''
3251        for reg in range(rCount):
3252            eWalkCode += '''
3253                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3254                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3255            ''' % { "reg" : reg }
3256        eWalkCode += op
3257        for reg in range(rCount):
3258            eWalkCode += '''
3259            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3260            ''' % { "reg" : reg }
3261        iop = InstObjParams(name, Name,
3262                            "RegRegRegImmOp",
3263                            { "code": eWalkCode,
3264                              "r_count": rCount,
3265                              "predicate_test": predicateTest }, [])
3266        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3267        exec_output += NeonEqualRegExecute.subst(iop)
3268        for type in types:
3269            substDict = { "targs" : type,
3270                          "class_name" : Name }
3271            exec_output += NeonExecDeclare.subst(substDict)
3272
3273    vextCode = '''
3274        for (unsigned i = 0; i < eCount; i++) {
3275            unsigned index = i + imm;
3276            if (index < eCount) {
3277                destReg.elements[i] = srcReg1.elements[index];
3278            } else {
3279                index -= eCount;
3280                assert(index < eCount);
3281                destReg.elements[i] = srcReg2.elements[index];
3282            }
3283        }
3284    '''
3285    buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3286    buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3287
3288    def buildVtbxl(name, Name, length, isVtbl):
3289        global header_output, decoder_output, exec_output
3290        code = '''
3291            union
3292            {
3293                uint8_t bytes[32];
3294                FloatRegBits regs[8];
3295            } table;
3296
3297            union
3298            {
3299                uint8_t bytes[8];
3300                FloatRegBits regs[2];
3301            } destReg, srcReg2;
3302
3303            const unsigned length = %(length)d;
3304            const bool isVtbl = %(isVtbl)s;
3305
3306            srcReg2.regs[0] = htog(FpOp2P0.uw);
3307            srcReg2.regs[1] = htog(FpOp2P1.uw);
3308
3309            destReg.regs[0] = htog(FpDestP0.uw);
3310            destReg.regs[1] = htog(FpDestP1.uw);
3311        ''' % { "length" : length, "isVtbl" : isVtbl }
3312        for reg in range(8):
3313            if reg < length * 2:
3314                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3315                        { "reg" : reg }
3316            else:
3317                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3318        code += '''
3319        for (unsigned i = 0; i < sizeof(destReg); i++) {
3320            uint8_t index = srcReg2.bytes[i];
3321            if (index < 8 * length) {
3322                destReg.bytes[i] = table.bytes[index];
3323            } else {
3324                if (isVtbl)
3325                    destReg.bytes[i] = 0;
3326                // else destReg.bytes[i] unchanged
3327            }
3328        }
3329
3330        FpDestP0.uw = gtoh(destReg.regs[0]);
3331        FpDestP1.uw = gtoh(destReg.regs[1]);
3332        '''
3333        iop = InstObjParams(name, Name,
3334                            "RegRegRegOp",
3335                            { "code": code,
3336                              "predicate_test": predicateTest }, [])
3337        header_output += RegRegRegOpDeclare.subst(iop)
3338        decoder_output += RegRegRegOpConstructor.subst(iop)
3339        exec_output += PredOpExecute.subst(iop)
3340
3341    buildVtbxl("vtbl", "NVtbl1", 1, "true")
3342    buildVtbxl("vtbl", "NVtbl2", 2, "true")
3343    buildVtbxl("vtbl", "NVtbl3", 3, "true")
3344    buildVtbxl("vtbl", "NVtbl4", 4, "true")
3345
3346    buildVtbxl("vtbx", "NVtbx1", 1, "false")
3347    buildVtbxl("vtbx", "NVtbx2", 2, "false")
3348    buildVtbxl("vtbx", "NVtbx3", 3, "false")
3349    buildVtbxl("vtbx", "NVtbx4", 4, "false")
3350}};
3351