neon.isa revision 7641
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (isSnan(op1) || isSnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (isSnan(op1) || isSnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (isSnan(op1) || isSnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (isSnan(op1) || isSnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (isSnan(op1) || isSnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (isSnan(op1) || isSnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623    simdEnabledCheckCode = '''
624        if (!neonEnabled(Cpacr, Cpsr, Fpexc))
625            return disabledFault();
626    '''
627}};
628
629let {{
630
631    header_output = ""
632    exec_output = ""
633
634    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
635    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
636    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
637    signedTypes = smallSignedTypes + ("int64_t",)
638    smallTypes = smallUnsignedTypes + smallSignedTypes
639    allTypes = unsignedTypes + signedTypes
640
641    def threeEqualRegInst(name, Name, types, rCount, op,
642                          readDest=False, pairwise=False):
643        global header_output, exec_output
644        eWalkCode = simdEnabledCheckCode + '''
645        RegVect srcReg1, srcReg2, destReg;
646        '''
647        for reg in range(rCount):
648            eWalkCode += '''
649                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
650                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
651            ''' % { "reg" : reg }
652            if readDest:
653                eWalkCode += '''
654                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
655                ''' % { "reg" : reg }
656        readDestCode = ''
657        if readDest:
658            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
659        if pairwise:
660            eWalkCode += '''
661            for (unsigned i = 0; i < eCount; i++) {
662                Element srcElem1 = gtoh(2 * i < eCount ?
663                                        srcReg1.elements[2 * i] :
664                                        srcReg2.elements[2 * i - eCount]);
665                Element srcElem2 = gtoh(2 * i < eCount ?
666                                        srcReg1.elements[2 * i + 1] :
667                                        srcReg2.elements[2 * i + 1 - eCount]);
668                Element destElem;
669                %(readDest)s
670                %(op)s
671                destReg.elements[i] = htog(destElem);
672            }
673            ''' % { "op" : op, "readDest" : readDestCode }
674        else:
675            eWalkCode += '''
676            for (unsigned i = 0; i < eCount; i++) {
677                Element srcElem1 = gtoh(srcReg1.elements[i]);
678                Element srcElem2 = gtoh(srcReg2.elements[i]);
679                Element destElem;
680                %(readDest)s
681                %(op)s
682                destReg.elements[i] = htog(destElem);
683            }
684            ''' % { "op" : op, "readDest" : readDestCode }
685        for reg in range(rCount):
686            eWalkCode += '''
687            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
688            ''' % { "reg" : reg }
689        iop = InstObjParams(name, Name,
690                            "RegRegRegOp",
691                            { "code": eWalkCode,
692                              "r_count": rCount,
693                              "predicate_test": predicateTest }, [])
694        header_output += NeonRegRegRegOpDeclare.subst(iop)
695        exec_output += NeonEqualRegExecute.subst(iop)
696        for type in types:
697            substDict = { "targs" : type,
698                          "class_name" : Name }
699            exec_output += NeonExecDeclare.subst(substDict)
700
701    def threeEqualRegInstFp(name, Name, types, rCount, op,
702                            readDest=False, pairwise=False, toInt=False):
703        global header_output, exec_output
704        eWalkCode = simdEnabledCheckCode + '''
705        typedef FloatReg FloatVect[rCount];
706        FloatVect srcRegs1, srcRegs2;
707        '''
708        if toInt:
709            eWalkCode += 'RegVect destRegs;\n'
710        else:
711            eWalkCode += 'FloatVect destRegs;\n'
712        for reg in range(rCount):
713            eWalkCode += '''
714                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
715                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
716            ''' % { "reg" : reg }
717            if readDest:
718                if toInt:
719                    eWalkCode += '''
720                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
721                    ''' % { "reg" : reg }
722                else:
723                    eWalkCode += '''
724                        destRegs[%(reg)d] = FpDestP%(reg)d;
725                    ''' % { "reg" : reg }
726        readDestCode = ''
727        if readDest:
728            readDestCode = 'destReg = destRegs[r];'
729        destType = 'FloatReg'
730        writeDest = 'destRegs[r] = destReg;'
731        if toInt:
732            destType = 'FloatRegBits'
733            writeDest = 'destRegs.regs[r] = destReg;'
734        if pairwise:
735            eWalkCode += '''
736            for (unsigned r = 0; r < rCount; r++) {
737                FloatReg srcReg1 = (2 * r < rCount) ?
738                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
739                FloatReg srcReg2 = (2 * r < rCount) ?
740                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
741                %(destType)s destReg;
742                %(readDest)s
743                %(op)s
744                %(writeDest)s
745            }
746            ''' % { "op" : op,
747                    "readDest" : readDestCode,
748                    "destType" : destType,
749                    "writeDest" : writeDest }
750        else:
751            eWalkCode += '''
752            for (unsigned r = 0; r < rCount; r++) {
753                FloatReg srcReg1 = srcRegs1[r];
754                FloatReg srcReg2 = srcRegs2[r];
755                %(destType)s destReg;
756                %(readDest)s
757                %(op)s
758                %(writeDest)s
759            }
760            ''' % { "op" : op,
761                    "readDest" : readDestCode,
762                    "destType" : destType,
763                    "writeDest" : writeDest }
764        for reg in range(rCount):
765            if toInt:
766                eWalkCode += '''
767                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
768                ''' % { "reg" : reg }
769            else:
770                eWalkCode += '''
771                FpDestP%(reg)d = destRegs[%(reg)d];
772                ''' % { "reg" : reg }
773        iop = InstObjParams(name, Name,
774                            "FpRegRegRegOp",
775                            { "code": eWalkCode,
776                              "r_count": rCount,
777                              "predicate_test": predicateTest }, [])
778        header_output += NeonRegRegRegOpDeclare.subst(iop)
779        exec_output += NeonEqualRegExecute.subst(iop)
780        for type in types:
781            substDict = { "targs" : type,
782                          "class_name" : Name }
783            exec_output += NeonExecDeclare.subst(substDict)
784
785    def threeUnequalRegInst(name, Name, types, op,
786                            bigSrc1, bigSrc2, bigDest, readDest):
787        global header_output, exec_output
788        src1Cnt = src2Cnt = destCnt = 2
789        src1Prefix = src2Prefix = destPrefix = ''
790        if bigSrc1:
791            src1Cnt = 4
792            src1Prefix = 'Big'
793        if bigSrc2:
794            src2Cnt = 4
795            src2Prefix = 'Big'
796        if bigDest:
797            destCnt = 4
798            destPrefix = 'Big'
799        eWalkCode = simdEnabledCheckCode + '''
800            %sRegVect srcReg1;
801            %sRegVect srcReg2;
802            %sRegVect destReg;
803        ''' % (src1Prefix, src2Prefix, destPrefix)
804        for reg in range(src1Cnt):
805            eWalkCode += '''
806                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
807            ''' % { "reg" : reg }
808        for reg in range(src2Cnt):
809            eWalkCode += '''
810                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
811            ''' % { "reg" : reg }
812        if readDest:
813            for reg in range(destCnt):
814                eWalkCode += '''
815                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
816                ''' % { "reg" : reg }
817        readDestCode = ''
818        if readDest:
819            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
820        eWalkCode += '''
821        for (unsigned i = 0; i < eCount; i++) {
822            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
823            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
824            %(destPrefix)sElement destElem;
825            %(readDest)s
826            %(op)s
827            destReg.elements[i] = htog(destElem);
828        }
829        ''' % { "op" : op, "readDest" : readDestCode,
830                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
831                "destPrefix" : destPrefix }
832        for reg in range(destCnt):
833            eWalkCode += '''
834            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
835            ''' % { "reg" : reg }
836        iop = InstObjParams(name, Name,
837                            "RegRegRegOp",
838                            { "code": eWalkCode,
839                              "r_count": 2,
840                              "predicate_test": predicateTest }, [])
841        header_output += NeonRegRegRegOpDeclare.subst(iop)
842        exec_output += NeonUnequalRegExecute.subst(iop)
843        for type in types:
844            substDict = { "targs" : type,
845                          "class_name" : Name }
846            exec_output += NeonExecDeclare.subst(substDict)
847
848    def threeRegNarrowInst(name, Name, types, op, readDest=False):
849        threeUnequalRegInst(name, Name, types, op,
850                            True, True, False, readDest)
851
852    def threeRegLongInst(name, Name, types, op, readDest=False):
853        threeUnequalRegInst(name, Name, types, op,
854                            False, False, True, readDest)
855
856    def threeRegWideInst(name, Name, types, op, readDest=False):
857        threeUnequalRegInst(name, Name, types, op,
858                            True, False, True, readDest)
859
860    def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
861        global header_output, exec_output
862        eWalkCode = simdEnabledCheckCode + '''
863        RegVect srcReg1, srcReg2, destReg;
864        '''
865        for reg in range(rCount):
866            eWalkCode += '''
867                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
868                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
869            ''' % { "reg" : reg }
870            if readDest:
871                eWalkCode += '''
872                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
873                ''' % { "reg" : reg }
874        readDestCode = ''
875        if readDest:
876            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
877        eWalkCode += '''
878        assert(imm >= 0 && imm < eCount);
879        for (unsigned i = 0; i < eCount; i++) {
880            Element srcElem1 = gtoh(srcReg1.elements[i]);
881            Element srcElem2 = gtoh(srcReg2.elements[imm]);
882            Element destElem;
883            %(readDest)s
884            %(op)s
885            destReg.elements[i] = htog(destElem);
886        }
887        ''' % { "op" : op, "readDest" : readDestCode }
888        for reg in range(rCount):
889            eWalkCode += '''
890            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
891            ''' % { "reg" : reg }
892        iop = InstObjParams(name, Name,
893                            "RegRegRegImmOp",
894                            { "code": eWalkCode,
895                              "r_count": rCount,
896                              "predicate_test": predicateTest }, [])
897        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
898        exec_output += NeonEqualRegExecute.subst(iop)
899        for type in types:
900            substDict = { "targs" : type,
901                          "class_name" : Name }
902            exec_output += NeonExecDeclare.subst(substDict)
903
904    def twoRegLongInst(name, Name, types, op, readDest=False):
905        global header_output, exec_output
906        rCount = 2
907        eWalkCode = simdEnabledCheckCode + '''
908        RegVect srcReg1, srcReg2;
909        BigRegVect destReg;
910        '''
911        for reg in range(rCount):
912            eWalkCode += '''
913                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
914                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
915            ''' % { "reg" : reg }
916        if readDest:
917            for reg in range(2 * rCount):
918                eWalkCode += '''
919                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
920                ''' % { "reg" : reg }
921        readDestCode = ''
922        if readDest:
923            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
924        eWalkCode += '''
925        assert(imm >= 0 && imm < eCount);
926        for (unsigned i = 0; i < eCount; i++) {
927            Element srcElem1 = gtoh(srcReg1.elements[i]);
928            Element srcElem2 = gtoh(srcReg2.elements[imm]);
929            BigElement destElem;
930            %(readDest)s
931            %(op)s
932            destReg.elements[i] = htog(destElem);
933        }
934        ''' % { "op" : op, "readDest" : readDestCode }
935        for reg in range(2 * rCount):
936            eWalkCode += '''
937            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
938            ''' % { "reg" : reg }
939        iop = InstObjParams(name, Name,
940                            "RegRegRegImmOp",
941                            { "code": eWalkCode,
942                              "r_count": rCount,
943                              "predicate_test": predicateTest }, [])
944        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
945        exec_output += NeonUnequalRegExecute.subst(iop)
946        for type in types:
947            substDict = { "targs" : type,
948                          "class_name" : Name }
949            exec_output += NeonExecDeclare.subst(substDict)
950
951    def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
952        global header_output, exec_output
953        eWalkCode = simdEnabledCheckCode + '''
954        typedef FloatReg FloatVect[rCount];
955        FloatVect srcRegs1, srcRegs2, destRegs;
956        '''
957        for reg in range(rCount):
958            eWalkCode += '''
959                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
960                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
961            ''' % { "reg" : reg }
962            if readDest:
963                eWalkCode += '''
964                    destRegs[%(reg)d] = FpDestP%(reg)d;
965                ''' % { "reg" : reg }
966        readDestCode = ''
967        if readDest:
968            readDestCode = 'destReg = destRegs[i];'
969        eWalkCode += '''
970        assert(imm >= 0 && imm < rCount);
971        for (unsigned i = 0; i < rCount; i++) {
972            FloatReg srcReg1 = srcRegs1[i];
973            FloatReg srcReg2 = srcRegs2[imm];
974            FloatReg destReg;
975            %(readDest)s
976            %(op)s
977            destRegs[i] = destReg;
978        }
979        ''' % { "op" : op, "readDest" : readDestCode }
980        for reg in range(rCount):
981            eWalkCode += '''
982            FpDestP%(reg)d = destRegs[%(reg)d];
983            ''' % { "reg" : reg }
984        iop = InstObjParams(name, Name,
985                            "FpRegRegRegImmOp",
986                            { "code": eWalkCode,
987                              "r_count": rCount,
988                              "predicate_test": predicateTest }, [])
989        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
990        exec_output += NeonEqualRegExecute.subst(iop)
991        for type in types:
992            substDict = { "targs" : type,
993                          "class_name" : Name }
994            exec_output += NeonExecDeclare.subst(substDict)
995
996    def twoRegShiftInst(name, Name, types, rCount, op,
997            readDest=False, toInt=False, fromInt=False):
998        global header_output, exec_output
999        eWalkCode = simdEnabledCheckCode + '''
1000        RegVect srcRegs1, destRegs;
1001        '''
1002        for reg in range(rCount):
1003            eWalkCode += '''
1004                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1005            ''' % { "reg" : reg }
1006            if readDest:
1007                eWalkCode += '''
1008                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1009                ''' % { "reg" : reg }
1010        readDestCode = ''
1011        if readDest:
1012            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1013            if toInt:
1014                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1015        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1016        if fromInt:
1017            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1018        declDest = 'Element destElem;'
1019        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1020        if toInt:
1021            declDest = 'FloatRegBits destReg;'
1022            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1023        eWalkCode += '''
1024        for (unsigned i = 0; i < eCount; i++) {
1025            %(readOp)s
1026            %(declDest)s
1027            %(readDest)s
1028            %(op)s
1029            %(writeDest)s
1030        }
1031        ''' % { "readOp" : readOpCode,
1032                "declDest" : declDest,
1033                "readDest" : readDestCode,
1034                "op" : op,
1035                "writeDest" : writeDestCode }
1036        for reg in range(rCount):
1037            eWalkCode += '''
1038            FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1039            ''' % { "reg" : reg }
1040        iop = InstObjParams(name, Name,
1041                            "RegRegImmOp",
1042                            { "code": eWalkCode,
1043                              "r_count": rCount,
1044                              "predicate_test": predicateTest }, [])
1045        header_output += NeonRegRegImmOpDeclare.subst(iop)
1046        exec_output += NeonEqualRegExecute.subst(iop)
1047        for type in types:
1048            substDict = { "targs" : type,
1049                          "class_name" : Name }
1050            exec_output += NeonExecDeclare.subst(substDict)
1051
1052    def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1053        global header_output, exec_output
1054        eWalkCode = simdEnabledCheckCode + '''
1055        BigRegVect srcReg1;
1056        RegVect destReg;
1057        '''
1058        for reg in range(4):
1059            eWalkCode += '''
1060                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1061            ''' % { "reg" : reg }
1062        if readDest:
1063            for reg in range(2):
1064                eWalkCode += '''
1065                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1066                ''' % { "reg" : reg }
1067        readDestCode = ''
1068        if readDest:
1069            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1070        eWalkCode += '''
1071        for (unsigned i = 0; i < eCount; i++) {
1072            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1073            Element destElem;
1074            %(readDest)s
1075            %(op)s
1076            destReg.elements[i] = htog(destElem);
1077        }
1078        ''' % { "op" : op, "readDest" : readDestCode }
1079        for reg in range(2):
1080            eWalkCode += '''
1081            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1082            ''' % { "reg" : reg }
1083        iop = InstObjParams(name, Name,
1084                            "RegRegImmOp",
1085                            { "code": eWalkCode,
1086                              "r_count": 2,
1087                              "predicate_test": predicateTest }, [])
1088        header_output += NeonRegRegImmOpDeclare.subst(iop)
1089        exec_output += NeonUnequalRegExecute.subst(iop)
1090        for type in types:
1091            substDict = { "targs" : type,
1092                          "class_name" : Name }
1093            exec_output += NeonExecDeclare.subst(substDict)
1094
1095    def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1096        global header_output, exec_output
1097        eWalkCode = simdEnabledCheckCode + '''
1098        RegVect srcReg1;
1099        BigRegVect destReg;
1100        '''
1101        for reg in range(2):
1102            eWalkCode += '''
1103                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1104            ''' % { "reg" : reg }
1105        if readDest:
1106            for reg in range(4):
1107                eWalkCode += '''
1108                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1109                ''' % { "reg" : reg }
1110        readDestCode = ''
1111        if readDest:
1112            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1113        eWalkCode += '''
1114        for (unsigned i = 0; i < eCount; i++) {
1115            Element srcElem1 = gtoh(srcReg1.elements[i]);
1116            BigElement destElem;
1117            %(readDest)s
1118            %(op)s
1119            destReg.elements[i] = htog(destElem);
1120        }
1121        ''' % { "op" : op, "readDest" : readDestCode }
1122        for reg in range(4):
1123            eWalkCode += '''
1124            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1125            ''' % { "reg" : reg }
1126        iop = InstObjParams(name, Name,
1127                            "RegRegImmOp",
1128                            { "code": eWalkCode,
1129                              "r_count": 2,
1130                              "predicate_test": predicateTest }, [])
1131        header_output += NeonRegRegImmOpDeclare.subst(iop)
1132        exec_output += NeonUnequalRegExecute.subst(iop)
1133        for type in types:
1134            substDict = { "targs" : type,
1135                          "class_name" : Name }
1136            exec_output += NeonExecDeclare.subst(substDict)
1137
1138    def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1139        global header_output, exec_output
1140        eWalkCode = simdEnabledCheckCode + '''
1141        RegVect srcReg1, destReg;
1142        '''
1143        for reg in range(rCount):
1144            eWalkCode += '''
1145                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1146            ''' % { "reg" : reg }
1147            if readDest:
1148                eWalkCode += '''
1149                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1150                ''' % { "reg" : reg }
1151        readDestCode = ''
1152        if readDest:
1153            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1154        eWalkCode += '''
1155        for (unsigned i = 0; i < eCount; i++) {
1156            unsigned j = i;
1157            Element srcElem1 = gtoh(srcReg1.elements[i]);
1158            Element destElem;
1159            %(readDest)s
1160            %(op)s
1161            destReg.elements[j] = htog(destElem);
1162        }
1163        ''' % { "op" : op, "readDest" : readDestCode }
1164        for reg in range(rCount):
1165            eWalkCode += '''
1166            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1167            ''' % { "reg" : reg }
1168        iop = InstObjParams(name, Name,
1169                            "RegRegOp",
1170                            { "code": eWalkCode,
1171                              "r_count": rCount,
1172                              "predicate_test": predicateTest }, [])
1173        header_output += NeonRegRegOpDeclare.subst(iop)
1174        exec_output += NeonEqualRegExecute.subst(iop)
1175        for type in types:
1176            substDict = { "targs" : type,
1177                          "class_name" : Name }
1178            exec_output += NeonExecDeclare.subst(substDict)
1179
1180    def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1181        global header_output, exec_output
1182        eWalkCode = simdEnabledCheckCode + '''
1183        RegVect srcReg1, destReg;
1184        '''
1185        for reg in range(rCount):
1186            eWalkCode += '''
1187                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1188            ''' % { "reg" : reg }
1189            if readDest:
1190                eWalkCode += '''
1191                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1192                ''' % { "reg" : reg }
1193        readDestCode = ''
1194        if readDest:
1195            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1196        eWalkCode += '''
1197        for (unsigned i = 0; i < eCount; i++) {
1198            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1199            Element destElem;
1200            %(readDest)s
1201            %(op)s
1202            destReg.elements[i] = htog(destElem);
1203        }
1204        ''' % { "op" : op, "readDest" : readDestCode }
1205        for reg in range(rCount):
1206            eWalkCode += '''
1207            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1208            ''' % { "reg" : reg }
1209        iop = InstObjParams(name, Name,
1210                            "RegRegImmOp",
1211                            { "code": eWalkCode,
1212                              "r_count": rCount,
1213                              "predicate_test": predicateTest }, [])
1214        header_output += NeonRegRegImmOpDeclare.subst(iop)
1215        exec_output += NeonEqualRegExecute.subst(iop)
1216        for type in types:
1217            substDict = { "targs" : type,
1218                          "class_name" : Name }
1219            exec_output += NeonExecDeclare.subst(substDict)
1220
1221    def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1222        global header_output, exec_output
1223        eWalkCode = simdEnabledCheckCode + '''
1224        RegVect srcReg1, destReg;
1225        '''
1226        for reg in range(rCount):
1227            eWalkCode += '''
1228                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1229                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1230            ''' % { "reg" : reg }
1231            if readDest:
1232                eWalkCode += '''
1233                ''' % { "reg" : reg }
1234        readDestCode = ''
1235        if readDest:
1236            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1237        eWalkCode += op
1238        for reg in range(rCount):
1239            eWalkCode += '''
1240            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1241            FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1242            ''' % { "reg" : reg }
1243        iop = InstObjParams(name, Name,
1244                            "RegRegOp",
1245                            { "code": eWalkCode,
1246                              "r_count": rCount,
1247                              "predicate_test": predicateTest }, [])
1248        header_output += NeonRegRegOpDeclare.subst(iop)
1249        exec_output += NeonEqualRegExecute.subst(iop)
1250        for type in types:
1251            substDict = { "targs" : type,
1252                          "class_name" : Name }
1253            exec_output += NeonExecDeclare.subst(substDict)
1254
1255    def twoRegMiscInstFp(name, Name, types, rCount, op,
1256            readDest=False, toInt=False):
1257        global header_output, exec_output
1258        eWalkCode = simdEnabledCheckCode + '''
1259        typedef FloatReg FloatVect[rCount];
1260        FloatVect srcRegs1;
1261        '''
1262        if toInt:
1263            eWalkCode += 'RegVect destRegs;\n'
1264        else:
1265            eWalkCode += 'FloatVect destRegs;\n'
1266        for reg in range(rCount):
1267            eWalkCode += '''
1268                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1269            ''' % { "reg" : reg }
1270            if readDest:
1271                if toInt:
1272                    eWalkCode += '''
1273                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1274                    ''' % { "reg" : reg }
1275                else:
1276                    eWalkCode += '''
1277                        destRegs[%(reg)d] = FpDestP%(reg)d;
1278                    ''' % { "reg" : reg }
1279        readDestCode = ''
1280        if readDest:
1281            readDestCode = 'destReg = destRegs[i];'
1282        destType = 'FloatReg'
1283        writeDest = 'destRegs[r] = destReg;'
1284        if toInt:
1285            destType = 'FloatRegBits'
1286            writeDest = 'destRegs.regs[r] = destReg;'
1287        eWalkCode += '''
1288        for (unsigned r = 0; r < rCount; r++) {
1289            FloatReg srcReg1 = srcRegs1[r];
1290            %(destType)s destReg;
1291            %(readDest)s
1292            %(op)s
1293            %(writeDest)s
1294        }
1295        ''' % { "op" : op,
1296                "readDest" : readDestCode,
1297                "destType" : destType,
1298                "writeDest" : writeDest }
1299        for reg in range(rCount):
1300            if toInt:
1301                eWalkCode += '''
1302                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1303                ''' % { "reg" : reg }
1304            else:
1305                eWalkCode += '''
1306                FpDestP%(reg)d = destRegs[%(reg)d];
1307                ''' % { "reg" : reg }
1308        iop = InstObjParams(name, Name,
1309                            "FpRegRegOp",
1310                            { "code": eWalkCode,
1311                              "r_count": rCount,
1312                              "predicate_test": predicateTest }, [])
1313        header_output += NeonRegRegOpDeclare.subst(iop)
1314        exec_output += NeonEqualRegExecute.subst(iop)
1315        for type in types:
1316            substDict = { "targs" : type,
1317                          "class_name" : Name }
1318            exec_output += NeonExecDeclare.subst(substDict)
1319
1320    def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1321        global header_output, exec_output
1322        eWalkCode = simdEnabledCheckCode + '''
1323        RegVect srcRegs;
1324        BigRegVect destReg;
1325        '''
1326        for reg in range(rCount):
1327            eWalkCode += '''
1328                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1329            ''' % { "reg" : reg }
1330            if readDest:
1331                eWalkCode += '''
1332                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1333                ''' % { "reg" : reg }
1334        readDestCode = ''
1335        if readDest:
1336            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1337        eWalkCode += '''
1338        for (unsigned i = 0; i < eCount / 2; i++) {
1339            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1340            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1341            BigElement destElem;
1342            %(readDest)s
1343            %(op)s
1344            destReg.elements[i] = htog(destElem);
1345        }
1346        ''' % { "op" : op, "readDest" : readDestCode }
1347        for reg in range(rCount):
1348            eWalkCode += '''
1349            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1350            ''' % { "reg" : reg }
1351        iop = InstObjParams(name, Name,
1352                            "RegRegOp",
1353                            { "code": eWalkCode,
1354                              "r_count": rCount,
1355                              "predicate_test": predicateTest }, [])
1356        header_output += NeonRegRegOpDeclare.subst(iop)
1357        exec_output += NeonUnequalRegExecute.subst(iop)
1358        for type in types:
1359            substDict = { "targs" : type,
1360                          "class_name" : Name }
1361            exec_output += NeonExecDeclare.subst(substDict)
1362
1363    def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1364        global header_output, exec_output
1365        eWalkCode = simdEnabledCheckCode + '''
1366        BigRegVect srcReg1;
1367        RegVect destReg;
1368        '''
1369        for reg in range(4):
1370            eWalkCode += '''
1371                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1372            ''' % { "reg" : reg }
1373        if readDest:
1374            for reg in range(2):
1375                eWalkCode += '''
1376                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1377                ''' % { "reg" : reg }
1378        readDestCode = ''
1379        if readDest:
1380            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1381        eWalkCode += '''
1382        for (unsigned i = 0; i < eCount; i++) {
1383            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1384            Element destElem;
1385            %(readDest)s
1386            %(op)s
1387            destReg.elements[i] = htog(destElem);
1388        }
1389        ''' % { "op" : op, "readDest" : readDestCode }
1390        for reg in range(2):
1391            eWalkCode += '''
1392            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1393            ''' % { "reg" : reg }
1394        iop = InstObjParams(name, Name,
1395                            "RegRegOp",
1396                            { "code": eWalkCode,
1397                              "r_count": 2,
1398                              "predicate_test": predicateTest }, [])
1399        header_output += NeonRegRegOpDeclare.subst(iop)
1400        exec_output += NeonUnequalRegExecute.subst(iop)
1401        for type in types:
1402            substDict = { "targs" : type,
1403                          "class_name" : Name }
1404            exec_output += NeonExecDeclare.subst(substDict)
1405
1406    def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1407        global header_output, exec_output
1408        eWalkCode = simdEnabledCheckCode + '''
1409        RegVect destReg;
1410        '''
1411        if readDest:
1412            for reg in range(rCount):
1413                eWalkCode += '''
1414                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1415                ''' % { "reg" : reg }
1416        readDestCode = ''
1417        if readDest:
1418            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1419        eWalkCode += '''
1420        for (unsigned i = 0; i < eCount; i++) {
1421            Element destElem;
1422            %(readDest)s
1423            %(op)s
1424            destReg.elements[i] = htog(destElem);
1425        }
1426        ''' % { "op" : op, "readDest" : readDestCode }
1427        for reg in range(rCount):
1428            eWalkCode += '''
1429            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1430            ''' % { "reg" : reg }
1431        iop = InstObjParams(name, Name,
1432                            "RegImmOp",
1433                            { "code": eWalkCode,
1434                              "r_count": rCount,
1435                              "predicate_test": predicateTest }, [])
1436        header_output += NeonRegImmOpDeclare.subst(iop)
1437        exec_output += NeonEqualRegExecute.subst(iop)
1438        for type in types:
1439            substDict = { "targs" : type,
1440                          "class_name" : Name }
1441            exec_output += NeonExecDeclare.subst(substDict)
1442
1443    def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1444        global header_output, exec_output
1445        eWalkCode = simdEnabledCheckCode + '''
1446        RegVect srcReg1;
1447        BigRegVect destReg;
1448        '''
1449        for reg in range(2):
1450            eWalkCode += '''
1451                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1452            ''' % { "reg" : reg }
1453        if readDest:
1454            for reg in range(4):
1455                eWalkCode += '''
1456                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1457                ''' % { "reg" : reg }
1458        readDestCode = ''
1459        if readDest:
1460            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1461        eWalkCode += '''
1462        for (unsigned i = 0; i < eCount; i++) {
1463            Element srcElem1 = gtoh(srcReg1.elements[i]);
1464            BigElement destElem;
1465            %(readDest)s
1466            %(op)s
1467            destReg.elements[i] = htog(destElem);
1468        }
1469        ''' % { "op" : op, "readDest" : readDestCode }
1470        for reg in range(4):
1471            eWalkCode += '''
1472            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1473            ''' % { "reg" : reg }
1474        iop = InstObjParams(name, Name,
1475                            "RegRegOp",
1476                            { "code": eWalkCode,
1477                              "r_count": 2,
1478                              "predicate_test": predicateTest }, [])
1479        header_output += NeonRegRegOpDeclare.subst(iop)
1480        exec_output += NeonUnequalRegExecute.subst(iop)
1481        for type in types:
1482            substDict = { "targs" : type,
1483                          "class_name" : Name }
1484            exec_output += NeonExecDeclare.subst(substDict)
1485
1486    vhaddCode = '''
1487        Element carryBit =
1488            (((unsigned)srcElem1 & 0x1) +
1489             ((unsigned)srcElem2 & 0x1)) >> 1;
1490        // Use division instead of a shift to ensure the sign extension works
1491        // right. The compiler will figure out if it can be a shift. Mask the
1492        // inputs so they get truncated correctly.
1493        destElem = (((srcElem1 & ~(Element)1) / 2) +
1494                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1495    '''
1496    threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1497    threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1498
1499    vrhaddCode = '''
1500        Element carryBit =
1501            (((unsigned)srcElem1 & 0x1) +
1502             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1503        // Use division instead of a shift to ensure the sign extension works
1504        // right. The compiler will figure out if it can be a shift. Mask the
1505        // inputs so they get truncated correctly.
1506        destElem = (((srcElem1 & ~(Element)1) / 2) +
1507                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1508    '''
1509    threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1510    threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1511
1512    vhsubCode = '''
1513        Element barrowBit =
1514            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1515        // Use division instead of a shift to ensure the sign extension works
1516        // right. The compiler will figure out if it can be a shift. Mask the
1517        // inputs so they get truncated correctly.
1518        destElem = (((srcElem1 & ~(Element)1) / 2) -
1519                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1520    '''
1521    threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1522    threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1523
1524    vandCode = '''
1525        destElem = srcElem1 & srcElem2;
1526    '''
1527    threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1528    threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1529
1530    vbicCode = '''
1531        destElem = srcElem1 & ~srcElem2;
1532    '''
1533    threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1534    threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1535
1536    vorrCode = '''
1537        destElem = srcElem1 | srcElem2;
1538    '''
1539    threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1540    threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1541
1542    threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1543    threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1544
1545    vornCode = '''
1546        destElem = srcElem1 | ~srcElem2;
1547    '''
1548    threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1549    threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1550
1551    veorCode = '''
1552        destElem = srcElem1 ^ srcElem2;
1553    '''
1554    threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1555    threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1556
1557    vbifCode = '''
1558        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1559    '''
1560    threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1561    threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1562    vbitCode = '''
1563        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1564    '''
1565    threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1566    threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1567    vbslCode = '''
1568        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1569    '''
1570    threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1571    threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1572
1573    vmaxCode = '''
1574        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1575    '''
1576    threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1577    threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1578
1579    vminCode = '''
1580        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1581    '''
1582    threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1583    threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1584
1585    vaddCode = '''
1586        destElem = srcElem1 + srcElem2;
1587    '''
1588    threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1589    threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1590
1591    threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1592                      2, vaddCode, pairwise=True)
1593    threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1594                      4, vaddCode, pairwise=True)
1595    vaddlwCode = '''
1596        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1597    '''
1598    threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1599    threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1600    vaddhnCode = '''
1601        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1602                   (sizeof(Element) * 8);
1603    '''
1604    threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1605    vraddhnCode = '''
1606        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1607                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1608                   (sizeof(Element) * 8);
1609    '''
1610    threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1611
1612    vsubCode = '''
1613        destElem = srcElem1 - srcElem2;
1614    '''
1615    threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1616    threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1617    vsublwCode = '''
1618        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1619    '''
1620    threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1621    threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1622
1623    vqaddUCode = '''
1624        destElem = srcElem1 + srcElem2;
1625        FPSCR fpscr = (FPSCR)Fpscr;
1626        if (destElem < srcElem1 || destElem < srcElem2) {
1627            destElem = (Element)(-1);
1628            fpscr.qc = 1;
1629        }
1630        Fpscr = fpscr;
1631    '''
1632    threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1633    threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1634    vsubhnCode = '''
1635        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1636                   (sizeof(Element) * 8);
1637    '''
1638    threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1639    vrsubhnCode = '''
1640        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1641                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1642                   (sizeof(Element) * 8);
1643    '''
1644    threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1645
1646    vqaddSCode = '''
1647        destElem = srcElem1 + srcElem2;
1648        FPSCR fpscr = (FPSCR)Fpscr;
1649        bool negDest = (destElem < 0);
1650        bool negSrc1 = (srcElem1 < 0);
1651        bool negSrc2 = (srcElem2 < 0);
1652        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1653            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1654            if (negDest)
1655                destElem -= 1;
1656            fpscr.qc = 1;
1657        }
1658        Fpscr = fpscr;
1659    '''
1660    threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1661    threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1662
1663    vqsubUCode = '''
1664        destElem = srcElem1 - srcElem2;
1665        FPSCR fpscr = (FPSCR)Fpscr;
1666        if (destElem > srcElem1) {
1667            destElem = 0;
1668            fpscr.qc = 1;
1669        }
1670        Fpscr = fpscr;
1671    '''
1672    threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1673    threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1674
1675    vqsubSCode = '''
1676        destElem = srcElem1 - srcElem2;
1677        FPSCR fpscr = (FPSCR)Fpscr;
1678        bool negDest = (destElem < 0);
1679        bool negSrc1 = (srcElem1 < 0);
1680        bool posSrc2 = (srcElem2 >= 0);
1681        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1682            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1683            if (negDest)
1684                destElem -= 1;
1685            fpscr.qc = 1;
1686        }
1687        Fpscr = fpscr;
1688    '''
1689    threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1690    threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1691
1692    vcgtCode = '''
1693        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1694    '''
1695    threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1696    threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1697
1698    vcgeCode = '''
1699        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1700    '''
1701    threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1702    threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1703
1704    vceqCode = '''
1705        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1706    '''
1707    threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1708    threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1709
1710    vshlCode = '''
1711        int16_t shiftAmt = (int8_t)srcElem2;
1712        if (shiftAmt < 0) {
1713            shiftAmt = -shiftAmt;
1714            if (shiftAmt >= sizeof(Element) * 8) {
1715                shiftAmt = sizeof(Element) * 8 - 1;
1716                destElem = 0;
1717            } else {
1718                destElem = (srcElem1 >> shiftAmt);
1719            }
1720            // Make sure the right shift sign extended when it should.
1721            if (ltz(srcElem1) && !ltz(destElem)) {
1722                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1723                                             1 - shiftAmt));
1724            }
1725        } else {
1726            if (shiftAmt >= sizeof(Element) * 8) {
1727                destElem = 0;
1728            } else {
1729                destElem = srcElem1 << shiftAmt;
1730            }
1731        }
1732    '''
1733    threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1734    threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1735
1736    vrshlCode = '''
1737        int16_t shiftAmt = (int8_t)srcElem2;
1738        if (shiftAmt < 0) {
1739            shiftAmt = -shiftAmt;
1740            Element rBit = 0;
1741            if (shiftAmt <= sizeof(Element) * 8)
1742                rBit = bits(srcElem1, shiftAmt - 1);
1743            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1744                rBit = 1;
1745            if (shiftAmt >= sizeof(Element) * 8) {
1746                shiftAmt = sizeof(Element) * 8 - 1;
1747                destElem = 0;
1748            } else {
1749                destElem = (srcElem1 >> shiftAmt);
1750            }
1751            // Make sure the right shift sign extended when it should.
1752            if (ltz(srcElem1) && !ltz(destElem)) {
1753                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1754                                             1 - shiftAmt));
1755            }
1756            destElem += rBit;
1757        } else if (shiftAmt > 0) {
1758            if (shiftAmt >= sizeof(Element) * 8) {
1759                destElem = 0;
1760            } else {
1761                destElem = srcElem1 << shiftAmt;
1762            }
1763        } else {
1764            destElem = srcElem1;
1765        }
1766    '''
1767    threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1768    threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1769
1770    vqshlUCode = '''
1771        int16_t shiftAmt = (int8_t)srcElem2;
1772        FPSCR fpscr = (FPSCR)Fpscr;
1773        if (shiftAmt < 0) {
1774            shiftAmt = -shiftAmt;
1775            if (shiftAmt >= sizeof(Element) * 8) {
1776                shiftAmt = sizeof(Element) * 8 - 1;
1777                destElem = 0;
1778            } else {
1779                destElem = (srcElem1 >> shiftAmt);
1780            }
1781        } else if (shiftAmt > 0) {
1782            if (shiftAmt >= sizeof(Element) * 8) {
1783                if (srcElem1 != 0) {
1784                    destElem = mask(sizeof(Element) * 8);
1785                    fpscr.qc = 1;
1786                } else {
1787                    destElem = 0;
1788                }
1789            } else {
1790                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1791                            sizeof(Element) * 8 - shiftAmt)) {
1792                    destElem = mask(sizeof(Element) * 8);
1793                    fpscr.qc = 1;
1794                } else {
1795                    destElem = srcElem1 << shiftAmt;
1796                }
1797            }
1798        } else {
1799            destElem = srcElem1;
1800        }
1801        Fpscr = fpscr;
1802    '''
1803    threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1804    threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1805
1806    vqshlSCode = '''
1807        int16_t shiftAmt = (int8_t)srcElem2;
1808        FPSCR fpscr = (FPSCR)Fpscr;
1809        if (shiftAmt < 0) {
1810            shiftAmt = -shiftAmt;
1811            if (shiftAmt >= sizeof(Element) * 8) {
1812                shiftAmt = sizeof(Element) * 8 - 1;
1813                destElem = 0;
1814            } else {
1815                destElem = (srcElem1 >> shiftAmt);
1816            }
1817            // Make sure the right shift sign extended when it should.
1818            if (srcElem1 < 0 && destElem >= 0) {
1819                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1820                                             1 - shiftAmt));
1821            }
1822        } else if (shiftAmt > 0) {
1823            bool sat = false;
1824            if (shiftAmt >= sizeof(Element) * 8) {
1825                if (srcElem1 != 0)
1826                    sat = true;
1827                else
1828                    destElem = 0;
1829            } else {
1830                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1831                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1832                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1833                    sat = true;
1834                } else {
1835                    destElem = srcElem1 << shiftAmt;
1836                }
1837            }
1838            if (sat) {
1839                fpscr.qc = 1;
1840                destElem = mask(sizeof(Element) * 8 - 1);
1841                if (srcElem1 < 0)
1842                    destElem = ~destElem;
1843            }
1844        } else {
1845            destElem = srcElem1;
1846        }
1847        Fpscr = fpscr;
1848    '''
1849    threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1850    threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1851
1852    vqrshlUCode = '''
1853        int16_t shiftAmt = (int8_t)srcElem2;
1854        FPSCR fpscr = (FPSCR)Fpscr;
1855        if (shiftAmt < 0) {
1856            shiftAmt = -shiftAmt;
1857            Element rBit = 0;
1858            if (shiftAmt <= sizeof(Element) * 8)
1859                rBit = bits(srcElem1, shiftAmt - 1);
1860            if (shiftAmt >= sizeof(Element) * 8) {
1861                shiftAmt = sizeof(Element) * 8 - 1;
1862                destElem = 0;
1863            } else {
1864                destElem = (srcElem1 >> shiftAmt);
1865            }
1866            destElem += rBit;
1867        } else {
1868            if (shiftAmt >= sizeof(Element) * 8) {
1869                if (srcElem1 != 0) {
1870                    destElem = mask(sizeof(Element) * 8);
1871                    fpscr.qc = 1;
1872                } else {
1873                    destElem = 0;
1874                }
1875            } else {
1876                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1877                            sizeof(Element) * 8 - shiftAmt)) {
1878                    destElem = mask(sizeof(Element) * 8);
1879                    fpscr.qc = 1;
1880                } else {
1881                    destElem = srcElem1 << shiftAmt;
1882                }
1883            }
1884        }
1885        Fpscr = fpscr;
1886    '''
1887    threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1888    threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1889
1890    vqrshlSCode = '''
1891        int16_t shiftAmt = (int8_t)srcElem2;
1892        FPSCR fpscr = (FPSCR)Fpscr;
1893        if (shiftAmt < 0) {
1894            shiftAmt = -shiftAmt;
1895            Element rBit = 0;
1896            if (shiftAmt <= sizeof(Element) * 8)
1897                rBit = bits(srcElem1, shiftAmt - 1);
1898            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1899                rBit = 1;
1900            if (shiftAmt >= sizeof(Element) * 8) {
1901                shiftAmt = sizeof(Element) * 8 - 1;
1902                destElem = 0;
1903            } else {
1904                destElem = (srcElem1 >> shiftAmt);
1905            }
1906            // Make sure the right shift sign extended when it should.
1907            if (srcElem1 < 0 && destElem >= 0) {
1908                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1909                                             1 - shiftAmt));
1910            }
1911            destElem += rBit;
1912        } else if (shiftAmt > 0) {
1913            bool sat = false;
1914            if (shiftAmt >= sizeof(Element) * 8) {
1915                if (srcElem1 != 0)
1916                    sat = true;
1917                else
1918                    destElem = 0;
1919            } else {
1920                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1921                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1922                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1923                    sat = true;
1924                } else {
1925                    destElem = srcElem1 << shiftAmt;
1926                }
1927            }
1928            if (sat) {
1929                fpscr.qc = 1;
1930                destElem = mask(sizeof(Element) * 8 - 1);
1931                if (srcElem1 < 0)
1932                    destElem = ~destElem;
1933            }
1934        } else {
1935            destElem = srcElem1;
1936        }
1937        Fpscr = fpscr;
1938    '''
1939    threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1940    threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1941
1942    vabaCode = '''
1943        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1944                                            (srcElem2 - srcElem1);
1945    '''
1946    threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1947    threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1948    vabalCode = '''
1949        destElem += (srcElem1 > srcElem2) ?
1950            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1951            ((BigElement)srcElem2 - (BigElement)srcElem1);
1952    '''
1953    threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1954
1955    vabdCode = '''
1956        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1957                                           (srcElem2 - srcElem1);
1958    '''
1959    threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1960    threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1961    vabdlCode = '''
1962        destElem = (srcElem1 > srcElem2) ?
1963            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1964            ((BigElement)srcElem2 - (BigElement)srcElem1);
1965    '''
1966    threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1967
1968    vtstCode = '''
1969        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1970    '''
1971    threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1972    threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1973
1974    vmulCode = '''
1975        destElem = srcElem1 * srcElem2;
1976    '''
1977    threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1978    threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1979    vmullCode = '''
1980        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1981    '''
1982    threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1983
1984    vmlaCode = '''
1985        destElem = destElem + srcElem1 * srcElem2;
1986    '''
1987    threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
1988    threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
1989    vmlalCode = '''
1990        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
1991    '''
1992    threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
1993
1994    vqdmlalCode = '''
1995        FPSCR fpscr = (FPSCR)Fpscr;
1996        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
1997        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
1998        Element halfNeg = maxNeg / 2;
1999        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2000            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2001            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2002            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2003            fpscr.qc = 1;
2004        }
2005        bool negPreDest = ltz(destElem);
2006        destElem += midElem;
2007        bool negDest = ltz(destElem);
2008        bool negMid = ltz(midElem);
2009        if (negPreDest == negMid && negMid != negDest) {
2010            destElem = mask(sizeof(BigElement) * 8 - 1);
2011            if (negPreDest)
2012                destElem = ~destElem;
2013            fpscr.qc = 1;
2014        }
2015        Fpscr = fpscr;
2016    '''
2017    threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2018
2019    vqdmlslCode = '''
2020        FPSCR fpscr = (FPSCR)Fpscr;
2021        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2022        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2023        Element halfNeg = maxNeg / 2;
2024        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2025            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2026            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2027            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2028            fpscr.qc = 1;
2029        }
2030        bool negPreDest = ltz(destElem);
2031        destElem -= midElem;
2032        bool negDest = ltz(destElem);
2033        bool posMid = ltz((BigElement)-midElem);
2034        if (negPreDest == posMid && posMid != negDest) {
2035            destElem = mask(sizeof(BigElement) * 8 - 1);
2036            if (negPreDest)
2037                destElem = ~destElem;
2038            fpscr.qc = 1;
2039        }
2040        Fpscr = fpscr;
2041    '''
2042    threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2043
2044    vqdmullCode = '''
2045        FPSCR fpscr = (FPSCR)Fpscr;
2046        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2047        if (srcElem1 == srcElem2 &&
2048                srcElem1 == (Element)((Element)1 <<
2049                    (Element)(sizeof(Element) * 8 - 1))) {
2050            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2051            fpscr.qc = 1;
2052        }
2053        Fpscr = fpscr;
2054    '''
2055    threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2056
2057    vmlsCode = '''
2058        destElem = destElem - srcElem1 * srcElem2;
2059    '''
2060    threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2061    threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2062    vmlslCode = '''
2063        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2064    '''
2065    threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2066
2067    vmulpCode = '''
2068        destElem = 0;
2069        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2070            if (bits(srcElem2, j))
2071                destElem ^= srcElem1 << j;
2072        }
2073    '''
2074    threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2075    threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2076    vmullpCode = '''
2077        destElem = 0;
2078        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2079            if (bits(srcElem2, j))
2080                destElem ^= (BigElement)srcElem1 << j;
2081        }
2082    '''
2083    threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2084
2085    threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2086    threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2087
2088    threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2089    threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2090
2091    vqdmulhCode = '''
2092        FPSCR fpscr = (FPSCR)Fpscr;
2093        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2094                   (sizeof(Element) * 8);
2095        if (srcElem1 == srcElem2 &&
2096                srcElem1 == (Element)((Element)1 <<
2097                    (sizeof(Element) * 8 - 1))) {
2098            destElem = ~srcElem1;
2099            fpscr.qc = 1;
2100        }
2101        Fpscr = fpscr;
2102    '''
2103    threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2104    threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2105
2106    vqrdmulhCode = '''
2107        FPSCR fpscr = (FPSCR)Fpscr;
2108        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2109                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2110                   (sizeof(Element) * 8);
2111        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2112        Element halfNeg = maxNeg / 2;
2113        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2114            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2115            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2116            if (destElem < 0) {
2117                destElem = mask(sizeof(Element) * 8 - 1);
2118            } else {
2119                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2120            }
2121            fpscr.qc = 1;
2122        }
2123        Fpscr = fpscr;
2124    '''
2125    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2126            smallSignedTypes, 2, vqrdmulhCode)
2127    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2128            smallSignedTypes, 4, vqrdmulhCode)
2129
2130    vmaxfpCode = '''
2131        FPSCR fpscr = (FPSCR)Fpscr;
2132        bool done;
2133        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2134        if (!done) {
2135            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2136                               true, true, VfpRoundNearest);
2137        } else if (flushToZero(srcReg1, srcReg2)) {
2138            fpscr.idc = 1;
2139        }
2140        Fpscr = fpscr;
2141    '''
2142    threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2143    threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2144
2145    vminfpCode = '''
2146        FPSCR fpscr = (FPSCR)Fpscr;
2147        bool done;
2148        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2149        if (!done) {
2150            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2151                               true, true, VfpRoundNearest);
2152        } else if (flushToZero(srcReg1, srcReg2)) {
2153            fpscr.idc = 1;
2154        }
2155        Fpscr = fpscr;
2156    '''
2157    threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2158    threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2159
2160    threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2161                        2, vmaxfpCode, pairwise=True)
2162    threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2163                        4, vmaxfpCode, pairwise=True)
2164
2165    threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2166                        2, vminfpCode, pairwise=True)
2167    threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2168                        4, vminfpCode, pairwise=True)
2169
2170    vaddfpCode = '''
2171        FPSCR fpscr = Fpscr;
2172        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2173                           true, true, VfpRoundNearest);
2174        Fpscr = fpscr;
2175    '''
2176    threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2177    threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2178
2179    threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2180                        2, vaddfpCode, pairwise=True)
2181    threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2182                        4, vaddfpCode, pairwise=True)
2183
2184    vsubfpCode = '''
2185        FPSCR fpscr = Fpscr;
2186        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2187                           true, true, VfpRoundNearest);
2188        Fpscr = fpscr;
2189    '''
2190    threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2191    threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2192
2193    vmulfpCode = '''
2194        FPSCR fpscr = Fpscr;
2195        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2196                           true, true, VfpRoundNearest);
2197        Fpscr = fpscr;
2198    '''
2199    threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2200    threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2201
2202    vmlafpCode = '''
2203        FPSCR fpscr = Fpscr;
2204        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2205                             true, true, VfpRoundNearest);
2206        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2207                           true, true, VfpRoundNearest);
2208        Fpscr = fpscr;
2209    '''
2210    threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2211    threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2212
2213    vmlsfpCode = '''
2214        FPSCR fpscr = Fpscr;
2215        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2216                             true, true, VfpRoundNearest);
2217        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2218                           true, true, VfpRoundNearest);
2219        Fpscr = fpscr;
2220    '''
2221    threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2222    threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2223
2224    vcgtfpCode = '''
2225        FPSCR fpscr = (FPSCR)Fpscr;
2226        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2227                             true, true, VfpRoundNearest);
2228        destReg = (res == 0) ? -1 : 0;
2229        if (res == 2.0)
2230            fpscr.ioc = 1;
2231        Fpscr = fpscr;
2232    '''
2233    threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2234            2, vcgtfpCode, toInt = True)
2235    threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2236            4, vcgtfpCode, toInt = True)
2237
2238    vcgefpCode = '''
2239        FPSCR fpscr = (FPSCR)Fpscr;
2240        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2241                             true, true, VfpRoundNearest);
2242        destReg = (res == 0) ? -1 : 0;
2243        if (res == 2.0)
2244            fpscr.ioc = 1;
2245        Fpscr = fpscr;
2246    '''
2247    threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2248            2, vcgefpCode, toInt = True)
2249    threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2250            4, vcgefpCode, toInt = True)
2251
2252    vacgtfpCode = '''
2253        FPSCR fpscr = (FPSCR)Fpscr;
2254        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2255                             true, true, VfpRoundNearest);
2256        destReg = (res == 0) ? -1 : 0;
2257        if (res == 2.0)
2258            fpscr.ioc = 1;
2259        Fpscr = fpscr;
2260    '''
2261    threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2262            2, vacgtfpCode, toInt = True)
2263    threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2264            4, vacgtfpCode, toInt = True)
2265
2266    vacgefpCode = '''
2267        FPSCR fpscr = (FPSCR)Fpscr;
2268        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2269                             true, true, VfpRoundNearest);
2270        destReg = (res == 0) ? -1 : 0;
2271        if (res == 2.0)
2272            fpscr.ioc = 1;
2273        Fpscr = fpscr;
2274    '''
2275    threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2276            2, vacgefpCode, toInt = True)
2277    threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2278            4, vacgefpCode, toInt = True)
2279
2280    vceqfpCode = '''
2281        FPSCR fpscr = (FPSCR)Fpscr;
2282        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2283                             true, true, VfpRoundNearest);
2284        destReg = (res == 0) ? -1 : 0;
2285        if (res == 2.0)
2286            fpscr.ioc = 1;
2287        Fpscr = fpscr;
2288    '''
2289    threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2290            2, vceqfpCode, toInt = True)
2291    threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2292            4, vceqfpCode, toInt = True)
2293
2294    vrecpsCode = '''
2295        FPSCR fpscr = Fpscr;
2296        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2297                           true, true, VfpRoundNearest);
2298        Fpscr = fpscr;
2299    '''
2300    threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2301    threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2302
2303    vrsqrtsCode = '''
2304        FPSCR fpscr = Fpscr;
2305        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2306                           true, true, VfpRoundNearest);
2307        Fpscr = fpscr;
2308    '''
2309    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2310    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2311
2312    vabdfpCode = '''
2313        FPSCR fpscr = Fpscr;
2314        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2315                             true, true, VfpRoundNearest);
2316        destReg = fabs(mid);
2317        Fpscr = fpscr;
2318    '''
2319    threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2320    threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2321
2322    twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2323    twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2324    twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2325    twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2326    twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2327
2328    twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2329    twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2330    twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2331    twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2332    twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2333
2334    twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2335    twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2336    twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2337    twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2338    twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2339
2340    twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2341    twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2342    twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2343    twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2344    twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2345    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2346            smallSignedTypes, 2, vqrdmulhCode)
2347    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2348            smallSignedTypes, 4, vqrdmulhCode)
2349
2350    vshrCode = '''
2351        if (imm >= sizeof(srcElem1) * 8) {
2352            if (ltz(srcElem1))
2353                destElem = -1;
2354            else
2355                destElem = 0;
2356        } else {
2357            destElem = srcElem1 >> imm;
2358        }
2359    '''
2360    twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2361    twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2362
2363    vsraCode = '''
2364        Element mid;;
2365        if (imm >= sizeof(srcElem1) * 8) {
2366            mid = ltz(srcElem1) ? -1 : 0;
2367        } else {
2368            mid = srcElem1 >> imm;
2369            if (ltz(srcElem1) && !ltz(mid)) {
2370                mid |= -(mid & ((Element)1 <<
2371                            (sizeof(Element) * 8 - 1 - imm)));
2372            }
2373        }
2374        destElem += mid;
2375    '''
2376    twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2377    twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2378
2379    vrshrCode = '''
2380        if (imm > sizeof(srcElem1) * 8) {
2381            destElem = 0;
2382        } else if (imm) {
2383            Element rBit = bits(srcElem1, imm - 1);
2384            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2385        } else {
2386            destElem = srcElem1;
2387        }
2388    '''
2389    twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2390    twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2391
2392    vrsraCode = '''
2393        if (imm > sizeof(srcElem1) * 8) {
2394            destElem += 0;
2395        } else if (imm) {
2396            Element rBit = bits(srcElem1, imm - 1);
2397            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2398        } else {
2399            destElem += srcElem1;
2400        }
2401    '''
2402    twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2403    twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2404
2405    vsriCode = '''
2406        if (imm >= sizeof(Element) * 8)
2407            destElem = destElem;
2408        else
2409            destElem = (srcElem1 >> imm) |
2410                (destElem & ~mask(sizeof(Element) * 8 - imm));
2411    '''
2412    twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2413    twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2414
2415    vshlCode = '''
2416        if (imm >= sizeof(Element) * 8)
2417            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2418        else
2419            destElem = srcElem1 << imm;
2420    '''
2421    twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2422    twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2423
2424    vsliCode = '''
2425        if (imm >= sizeof(Element) * 8)
2426            destElem = destElem;
2427        else
2428            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2429    '''
2430    twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2431    twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2432
2433    vqshlCode = '''
2434        FPSCR fpscr = (FPSCR)Fpscr;
2435        if (imm >= sizeof(Element) * 8) {
2436            if (srcElem1 != 0) {
2437                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2438                if (srcElem1 > 0)
2439                    destElem = ~destElem;
2440                fpscr.qc = 1;
2441            } else {
2442                destElem = 0;
2443            }
2444        } else if (imm) {
2445            destElem = (srcElem1 << imm);
2446            uint64_t topBits = bits((uint64_t)srcElem1,
2447                                    sizeof(Element) * 8 - 1,
2448                                    sizeof(Element) * 8 - 1 - imm);
2449            if (topBits != 0 && topBits != mask(imm + 1)) {
2450                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2451                if (srcElem1 > 0)
2452                    destElem = ~destElem;
2453                fpscr.qc = 1;
2454            }
2455        } else {
2456            destElem = srcElem1;
2457        }
2458        Fpscr = fpscr;
2459    '''
2460    twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2461    twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2462
2463    vqshluCode = '''
2464        FPSCR fpscr = (FPSCR)Fpscr;
2465        if (imm >= sizeof(Element) * 8) {
2466            if (srcElem1 != 0) {
2467                destElem = mask(sizeof(Element) * 8);
2468                fpscr.qc = 1;
2469            } else {
2470                destElem = 0;
2471            }
2472        } else if (imm) {
2473            destElem = (srcElem1 << imm);
2474            uint64_t topBits = bits((uint64_t)srcElem1,
2475                                    sizeof(Element) * 8 - 1,
2476                                    sizeof(Element) * 8 - imm);
2477            if (topBits != 0) {
2478                destElem = mask(sizeof(Element) * 8);
2479                fpscr.qc = 1;
2480            }
2481        } else {
2482            destElem = srcElem1;
2483        }
2484        Fpscr = fpscr;
2485    '''
2486    twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2487    twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2488
2489    vqshlusCode = '''
2490        FPSCR fpscr = (FPSCR)Fpscr;
2491        if (imm >= sizeof(Element) * 8) {
2492            if (srcElem1 < 0) {
2493                destElem = 0;
2494                fpscr.qc = 1;
2495            } else if (srcElem1 > 0) {
2496                destElem = mask(sizeof(Element) * 8);
2497                fpscr.qc = 1;
2498            } else {
2499                destElem = 0;
2500            }
2501        } else if (imm) {
2502            destElem = (srcElem1 << imm);
2503            uint64_t topBits = bits((uint64_t)srcElem1,
2504                                    sizeof(Element) * 8 - 1,
2505                                    sizeof(Element) * 8 - imm);
2506            if (srcElem1 < 0) {
2507                destElem = 0;
2508                fpscr.qc = 1;
2509            } else if (topBits != 0) {
2510                destElem = mask(sizeof(Element) * 8);
2511                fpscr.qc = 1;
2512            }
2513        } else {
2514            if (srcElem1 < 0) {
2515                fpscr.qc = 1;
2516                destElem = 0;
2517            } else {
2518                destElem = srcElem1;
2519            }
2520        }
2521        Fpscr = fpscr;
2522    '''
2523    twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2524    twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2525
2526    vshrnCode = '''
2527        if (imm >= sizeof(srcElem1) * 8) {
2528            destElem = 0;
2529        } else {
2530            destElem = srcElem1 >> imm;
2531        }
2532    '''
2533    twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2534
2535    vrshrnCode = '''
2536        if (imm > sizeof(srcElem1) * 8) {
2537            destElem = 0;
2538        } else if (imm) {
2539            Element rBit = bits(srcElem1, imm - 1);
2540            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2541        } else {
2542            destElem = srcElem1;
2543        }
2544    '''
2545    twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2546
2547    vqshrnCode = '''
2548        FPSCR fpscr = (FPSCR)Fpscr;
2549        if (imm > sizeof(srcElem1) * 8) {
2550            if (srcElem1 != 0 && srcElem1 != -1)
2551                fpscr.qc = 1;
2552            destElem = 0;
2553        } else if (imm) {
2554            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2555            mid |= -(mid & ((BigElement)1 <<
2556                        (sizeof(BigElement) * 8 - 1 - imm)));
2557            if (mid != (Element)mid) {
2558                destElem = mask(sizeof(Element) * 8 - 1);
2559                if (srcElem1 < 0)
2560                    destElem = ~destElem;
2561                fpscr.qc = 1;
2562            } else {
2563                destElem = mid;
2564            }
2565        } else {
2566            destElem = srcElem1;
2567        }
2568        Fpscr = fpscr;
2569    '''
2570    twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2571
2572    vqshrunCode = '''
2573        FPSCR fpscr = (FPSCR)Fpscr;
2574        if (imm > sizeof(srcElem1) * 8) {
2575            if (srcElem1 != 0)
2576                fpscr.qc = 1;
2577            destElem = 0;
2578        } else if (imm) {
2579            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2580            if (mid != (Element)mid) {
2581                destElem = mask(sizeof(Element) * 8);
2582                fpscr.qc = 1;
2583            } else {
2584                destElem = mid;
2585            }
2586        } else {
2587            destElem = srcElem1;
2588        }
2589        Fpscr = fpscr;
2590    '''
2591    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2592            smallUnsignedTypes, vqshrunCode)
2593
2594    vqshrunsCode = '''
2595        FPSCR fpscr = (FPSCR)Fpscr;
2596        if (imm > sizeof(srcElem1) * 8) {
2597            if (srcElem1 != 0)
2598                fpscr.qc = 1;
2599            destElem = 0;
2600        } else if (imm) {
2601            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2602            if (bits(mid, sizeof(BigElement) * 8 - 1,
2603                          sizeof(Element) * 8) != 0) {
2604                if (srcElem1 < 0) {
2605                    destElem = 0;
2606                } else {
2607                    destElem = mask(sizeof(Element) * 8);
2608                }
2609                fpscr.qc = 1;
2610            } else {
2611                destElem = mid;
2612            }
2613        } else {
2614            destElem = srcElem1;
2615        }
2616        Fpscr = fpscr;
2617    '''
2618    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2619            smallSignedTypes, vqshrunsCode)
2620
2621    vqrshrnCode = '''
2622        FPSCR fpscr = (FPSCR)Fpscr;
2623        if (imm > sizeof(srcElem1) * 8) {
2624            if (srcElem1 != 0 && srcElem1 != -1)
2625                fpscr.qc = 1;
2626            destElem = 0;
2627        } else if (imm) {
2628            BigElement mid = (srcElem1 >> (imm - 1));
2629            uint64_t rBit = mid & 0x1;
2630            mid >>= 1;
2631            mid |= -(mid & ((BigElement)1 <<
2632                        (sizeof(BigElement) * 8 - 1 - imm)));
2633            mid += rBit;
2634            if (mid != (Element)mid) {
2635                destElem = mask(sizeof(Element) * 8 - 1);
2636                if (srcElem1 < 0)
2637                    destElem = ~destElem;
2638                fpscr.qc = 1;
2639            } else {
2640                destElem = mid;
2641            }
2642        } else {
2643            if (srcElem1 != (Element)srcElem1) {
2644                destElem = mask(sizeof(Element) * 8 - 1);
2645                if (srcElem1 < 0)
2646                    destElem = ~destElem;
2647                fpscr.qc = 1;
2648            } else {
2649                destElem = srcElem1;
2650            }
2651        }
2652        Fpscr = fpscr;
2653    '''
2654    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2655            smallSignedTypes, vqrshrnCode)
2656
2657    vqrshrunCode = '''
2658        FPSCR fpscr = (FPSCR)Fpscr;
2659        if (imm > sizeof(srcElem1) * 8) {
2660            if (srcElem1 != 0)
2661                fpscr.qc = 1;
2662            destElem = 0;
2663        } else if (imm) {
2664            BigElement mid = (srcElem1 >> (imm - 1));
2665            uint64_t rBit = mid & 0x1;
2666            mid >>= 1;
2667            mid += rBit;
2668            if (mid != (Element)mid) {
2669                destElem = mask(sizeof(Element) * 8);
2670                fpscr.qc = 1;
2671            } else {
2672                destElem = mid;
2673            }
2674        } else {
2675            if (srcElem1 != (Element)srcElem1) {
2676                destElem = mask(sizeof(Element) * 8 - 1);
2677                fpscr.qc = 1;
2678            } else {
2679                destElem = srcElem1;
2680            }
2681        }
2682        Fpscr = fpscr;
2683    '''
2684    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2685            smallUnsignedTypes, vqrshrunCode)
2686
2687    vqrshrunsCode = '''
2688        FPSCR fpscr = (FPSCR)Fpscr;
2689        if (imm > sizeof(srcElem1) * 8) {
2690            if (srcElem1 != 0)
2691                fpscr.qc = 1;
2692            destElem = 0;
2693        } else if (imm) {
2694            BigElement mid = (srcElem1 >> (imm - 1));
2695            uint64_t rBit = mid & 0x1;
2696            mid >>= 1;
2697            mid |= -(mid & ((BigElement)1 <<
2698                            (sizeof(BigElement) * 8 - 1 - imm)));
2699            mid += rBit;
2700            if (bits(mid, sizeof(BigElement) * 8 - 1,
2701                          sizeof(Element) * 8) != 0) {
2702                if (srcElem1 < 0) {
2703                    destElem = 0;
2704                } else {
2705                    destElem = mask(sizeof(Element) * 8);
2706                }
2707                fpscr.qc = 1;
2708            } else {
2709                destElem = mid;
2710            }
2711        } else {
2712            if (srcElem1 < 0) {
2713                fpscr.qc = 1;
2714                destElem = 0;
2715            } else {
2716                destElem = srcElem1;
2717            }
2718        }
2719        Fpscr = fpscr;
2720    '''
2721    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2722            smallSignedTypes, vqrshrunsCode)
2723
2724    vshllCode = '''
2725        if (imm >= sizeof(destElem) * 8) {
2726            destElem = 0;
2727        } else {
2728            destElem = (BigElement)srcElem1 << imm;
2729        }
2730    '''
2731    twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2732
2733    vmovlCode = '''
2734        destElem = srcElem1;
2735    '''
2736    twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2737
2738    vcvt2ufxCode = '''
2739        FPSCR fpscr = Fpscr;
2740        if (flushToZero(srcElem1))
2741            fpscr.idc = 1;
2742        VfpSavedState state = prepFpState(VfpRoundNearest);
2743        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2744        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2745        __asm__ __volatile__("" :: "m" (destReg));
2746        finishVfp(fpscr, state, true);
2747        Fpscr = fpscr;
2748    '''
2749    twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2750            2, vcvt2ufxCode, toInt = True)
2751    twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2752            4, vcvt2ufxCode, toInt = True)
2753
2754    vcvt2sfxCode = '''
2755        FPSCR fpscr = Fpscr;
2756        if (flushToZero(srcElem1))
2757            fpscr.idc = 1;
2758        VfpSavedState state = prepFpState(VfpRoundNearest);
2759        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2760        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2761        __asm__ __volatile__("" :: "m" (destReg));
2762        finishVfp(fpscr, state, true);
2763        Fpscr = fpscr;
2764    '''
2765    twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2766            2, vcvt2sfxCode, toInt = True)
2767    twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2768            4, vcvt2sfxCode, toInt = True)
2769
2770    vcvtu2fpCode = '''
2771        FPSCR fpscr = Fpscr;
2772        VfpSavedState state = prepFpState(VfpRoundNearest);
2773        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2774        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2775        __asm__ __volatile__("" :: "m" (destElem));
2776        finishVfp(fpscr, state, true);
2777        Fpscr = fpscr;
2778    '''
2779    twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2780            2, vcvtu2fpCode, fromInt = True)
2781    twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2782            4, vcvtu2fpCode, fromInt = True)
2783
2784    vcvts2fpCode = '''
2785        FPSCR fpscr = Fpscr;
2786        VfpSavedState state = prepFpState(VfpRoundNearest);
2787        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2788        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2789        __asm__ __volatile__("" :: "m" (destElem));
2790        finishVfp(fpscr, state, true);
2791        Fpscr = fpscr;
2792    '''
2793    twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2794            2, vcvts2fpCode, fromInt = True)
2795    twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2796            4, vcvts2fpCode, fromInt = True)
2797
2798    vcvts2hCode = '''
2799        FPSCR fpscr = Fpscr;
2800        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2801        if (flushToZero(srcFp1))
2802            fpscr.idc = 1;
2803        VfpSavedState state = prepFpState(VfpRoundNearest);
2804        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2805                                : "m" (srcFp1), "m" (destElem));
2806        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2807                              fpscr.ahp, srcFp1);
2808        __asm__ __volatile__("" :: "m" (destElem));
2809        finishVfp(fpscr, state, true);
2810        Fpscr = fpscr;
2811    '''
2812    twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2813
2814    vcvth2sCode = '''
2815        FPSCR fpscr = Fpscr;
2816        VfpSavedState state = prepFpState(VfpRoundNearest);
2817        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2818                                : "m" (srcElem1), "m" (destElem));
2819        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2820        __asm__ __volatile__("" :: "m" (destElem));
2821        finishVfp(fpscr, state, true);
2822        Fpscr = fpscr;
2823    '''
2824    twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2825
2826    vrsqrteCode = '''
2827        destElem = unsignedRSqrtEstimate(srcElem1);
2828    '''
2829    twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2830    twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2831
2832    vrsqrtefpCode = '''
2833        FPSCR fpscr = Fpscr;
2834        if (flushToZero(srcReg1))
2835            fpscr.idc = 1;
2836        destReg = fprSqrtEstimate(fpscr, srcReg1);
2837        Fpscr = fpscr;
2838    '''
2839    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2840    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2841
2842    vrecpeCode = '''
2843        destElem = unsignedRecipEstimate(srcElem1);
2844    '''
2845    twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2846    twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2847
2848    vrecpefpCode = '''
2849        FPSCR fpscr = Fpscr;
2850        if (flushToZero(srcReg1))
2851            fpscr.idc = 1;
2852        destReg = fpRecipEstimate(fpscr, srcReg1);
2853        Fpscr = fpscr;
2854    '''
2855    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2856    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2857
2858    vrev16Code = '''
2859        destElem = srcElem1;
2860        unsigned groupSize = ((1 << 1) / sizeof(Element));
2861        unsigned reverseMask = (groupSize - 1);
2862        j = i ^ reverseMask;
2863    '''
2864    twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2865    twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2866    vrev32Code = '''
2867        destElem = srcElem1;
2868        unsigned groupSize = ((1 << 2) / sizeof(Element));
2869        unsigned reverseMask = (groupSize - 1);
2870        j = i ^ reverseMask;
2871    '''
2872    twoRegMiscInst("vrev32", "NVrev32D",
2873            ("uint8_t", "uint16_t"), 2, vrev32Code)
2874    twoRegMiscInst("vrev32", "NVrev32Q",
2875            ("uint8_t", "uint16_t"), 4, vrev32Code)
2876    vrev64Code = '''
2877        destElem = srcElem1;
2878        unsigned groupSize = ((1 << 3) / sizeof(Element));
2879        unsigned reverseMask = (groupSize - 1);
2880        j = i ^ reverseMask;
2881    '''
2882    twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2883    twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2884
2885    vpaddlCode = '''
2886        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2887    '''
2888    twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2889    twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2890
2891    vpadalCode = '''
2892        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2893    '''
2894    twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2895    twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2896
2897    vclsCode = '''
2898        unsigned count = 0;
2899        if (srcElem1 < 0) {
2900            srcElem1 <<= 1;
2901            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2902                count++;
2903                srcElem1 <<= 1;
2904            }
2905        } else {
2906            srcElem1 <<= 1;
2907            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2908                count++;
2909                srcElem1 <<= 1;
2910            }
2911        }
2912        destElem = count;
2913    '''
2914    twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2915    twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2916
2917    vclzCode = '''
2918        unsigned count = 0;
2919        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2920            count++;
2921            srcElem1 <<= 1;
2922        }
2923        destElem = count;
2924    '''
2925    twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2926    twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2927
2928    vcntCode = '''
2929        unsigned count = 0;
2930        while (srcElem1 && count < sizeof(Element) * 8) {
2931            count += srcElem1 & 0x1;
2932            srcElem1 >>= 1;
2933        }
2934        destElem = count;
2935    '''
2936    twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2937    twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2938
2939    vmvnCode = '''
2940        destElem = ~srcElem1;
2941    '''
2942    twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2943    twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2944
2945    vqabsCode = '''
2946        FPSCR fpscr = (FPSCR)Fpscr;
2947        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2948            fpscr.qc = 1;
2949            destElem = ~srcElem1;
2950        } else if (srcElem1 < 0) {
2951            destElem = -srcElem1;
2952        } else {
2953            destElem = srcElem1;
2954        }
2955        Fpscr = fpscr;
2956    '''
2957    twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2958    twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2959
2960    vqnegCode = '''
2961        FPSCR fpscr = (FPSCR)Fpscr;
2962        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2963            fpscr.qc = 1;
2964            destElem = ~srcElem1;
2965        } else {
2966            destElem = -srcElem1;
2967        }
2968        Fpscr = fpscr;
2969    '''
2970    twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2971    twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2972
2973    vabsCode = '''
2974        if (srcElem1 < 0) {
2975            destElem = -srcElem1;
2976        } else {
2977            destElem = srcElem1;
2978        }
2979    '''
2980    twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2981    twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2982    vabsfpCode = '''
2983        union
2984        {
2985            uint32_t i;
2986            float f;
2987        } cStruct;
2988        cStruct.f = srcReg1;
2989        cStruct.i &= mask(sizeof(Element) * 8 - 1);
2990        destReg = cStruct.f;
2991    '''
2992    twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
2993    twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
2994
2995    vnegCode = '''
2996        destElem = -srcElem1;
2997    '''
2998    twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
2999    twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
3000    vnegfpCode = '''
3001        destReg = -srcReg1;
3002    '''
3003    twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
3004    twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
3005
3006    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3007    twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3008    twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3009    vcgtfpCode = '''
3010        FPSCR fpscr = (FPSCR)Fpscr;
3011        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3012                             true, true, VfpRoundNearest);
3013        destReg = (res == 0) ? -1 : 0;
3014        if (res == 2.0)
3015            fpscr.ioc = 1;
3016        Fpscr = fpscr;
3017    '''
3018    twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3019            2, vcgtfpCode, toInt = True)
3020    twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3021            4, vcgtfpCode, toInt = True)
3022
3023    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3024    twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3025    twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3026    vcgefpCode = '''
3027        FPSCR fpscr = (FPSCR)Fpscr;
3028        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3029                             true, true, VfpRoundNearest);
3030        destReg = (res == 0) ? -1 : 0;
3031        if (res == 2.0)
3032            fpscr.ioc = 1;
3033        Fpscr = fpscr;
3034    '''
3035    twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3036            2, vcgefpCode, toInt = True)
3037    twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3038            4, vcgefpCode, toInt = True)
3039
3040    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3041    twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3042    twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3043    vceqfpCode = '''
3044        FPSCR fpscr = (FPSCR)Fpscr;
3045        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3046                             true, true, VfpRoundNearest);
3047        destReg = (res == 0) ? -1 : 0;
3048        if (res == 2.0)
3049            fpscr.ioc = 1;
3050        Fpscr = fpscr;
3051    '''
3052    twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3053            2, vceqfpCode, toInt = True)
3054    twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3055            4, vceqfpCode, toInt = True)
3056
3057    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3058    twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3059    twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3060    vclefpCode = '''
3061        FPSCR fpscr = (FPSCR)Fpscr;
3062        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3063                             true, true, VfpRoundNearest);
3064        destReg = (res == 0) ? -1 : 0;
3065        if (res == 2.0)
3066            fpscr.ioc = 1;
3067        Fpscr = fpscr;
3068    '''
3069    twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3070            2, vclefpCode, toInt = True)
3071    twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3072            4, vclefpCode, toInt = True)
3073
3074    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3075    twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3076    twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3077    vcltfpCode = '''
3078        FPSCR fpscr = (FPSCR)Fpscr;
3079        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3080                             true, true, VfpRoundNearest);
3081        destReg = (res == 0) ? -1 : 0;
3082        if (res == 2.0)
3083            fpscr.ioc = 1;
3084        Fpscr = fpscr;
3085    '''
3086    twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3087            2, vcltfpCode, toInt = True)
3088    twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3089            4, vcltfpCode, toInt = True)
3090
3091    vswpCode = '''
3092        FloatRegBits mid;
3093        for (unsigned r = 0; r < rCount; r++) {
3094            mid = srcReg1.regs[r];
3095            srcReg1.regs[r] = destReg.regs[r];
3096            destReg.regs[r] = mid;
3097        }
3098    '''
3099    twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3100    twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3101
3102    vtrnCode = '''
3103        Element mid;
3104        for (unsigned i = 0; i < eCount; i += 2) {
3105            mid = srcReg1.elements[i];
3106            srcReg1.elements[i] = destReg.elements[i + 1];
3107            destReg.elements[i + 1] = mid;
3108        }
3109    '''
3110    twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3111    twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3112
3113    vuzpCode = '''
3114        Element mid[eCount];
3115        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3116        for (unsigned i = 0; i < eCount / 2; i++) {
3117            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3118            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3119            destReg.elements[i] = destReg.elements[2 * i];
3120        }
3121        for (unsigned i = 0; i < eCount / 2; i++) {
3122            destReg.elements[eCount / 2 + i] = mid[2 * i];
3123        }
3124    '''
3125    twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3126    twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3127
3128    vzipCode = '''
3129        Element mid[eCount];
3130        memcpy(&mid, &destReg, sizeof(destReg));
3131        for (unsigned i = 0; i < eCount / 2; i++) {
3132            destReg.elements[2 * i] = mid[i];
3133            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3134        }
3135        for (int i = 0; i < eCount / 2; i++) {
3136            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3137            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3138        }
3139    '''
3140    twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3141    twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3142
3143    vmovnCode = 'destElem = srcElem1;'
3144    twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3145
3146    vdupCode = 'destElem = srcElem1;'
3147    twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3148    twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3149
3150    def vdupGprInst(name, Name, types, rCount):
3151        global header_output, exec_output
3152        eWalkCode = '''
3153        RegVect destReg;
3154        for (unsigned i = 0; i < eCount; i++) {
3155            destReg.elements[i] = htog((Element)Op1);
3156        }
3157        '''
3158        for reg in range(rCount):
3159            eWalkCode += '''
3160            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3161            ''' % { "reg" : reg }
3162        iop = InstObjParams(name, Name,
3163                            "RegRegOp",
3164                            { "code": eWalkCode,
3165                              "r_count": rCount,
3166                              "predicate_test": predicateTest }, [])
3167        header_output += NeonRegRegOpDeclare.subst(iop)
3168        exec_output += NeonEqualRegExecute.subst(iop)
3169        for type in types:
3170            substDict = { "targs" : type,
3171                          "class_name" : Name }
3172            exec_output += NeonExecDeclare.subst(substDict)
3173    vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3174    vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3175
3176    vmovCode = 'destElem = imm;'
3177    oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3178    oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3179
3180    vorrCode = 'destElem |= imm;'
3181    oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3182    oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3183
3184    vmvnCode = 'destElem = ~imm;'
3185    oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3186    oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3187
3188    vbicCode = 'destElem &= ~imm;'
3189    oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3190    oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3191
3192    vqmovnCode = '''
3193    FPSCR fpscr = (FPSCR)Fpscr;
3194    destElem = srcElem1;
3195    if ((BigElement)destElem != srcElem1) {
3196        fpscr.qc = 1;
3197        destElem = mask(sizeof(Element) * 8 - 1);
3198        if (srcElem1 < 0)
3199            destElem = ~destElem;
3200    }
3201    Fpscr = fpscr;
3202    '''
3203    twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3204
3205    vqmovunCode = '''
3206    FPSCR fpscr = (FPSCR)Fpscr;
3207    destElem = srcElem1;
3208    if ((BigElement)destElem != srcElem1) {
3209        fpscr.qc = 1;
3210        destElem = mask(sizeof(Element) * 8);
3211    }
3212    Fpscr = fpscr;
3213    '''
3214    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3215            smallUnsignedTypes, vqmovunCode)
3216
3217    vqmovunsCode = '''
3218    FPSCR fpscr = (FPSCR)Fpscr;
3219    destElem = srcElem1;
3220    if (srcElem1 < 0 ||
3221            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3222        fpscr.qc = 1;
3223        destElem = mask(sizeof(Element) * 8);
3224        if (srcElem1 < 0)
3225            destElem = ~destElem;
3226    }
3227    Fpscr = fpscr;
3228    '''
3229    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3230            smallSignedTypes, vqmovunsCode)
3231
3232    def buildVext(name, Name, types, rCount, op):
3233        global header_output, exec_output
3234        eWalkCode = '''
3235        RegVect srcReg1, srcReg2, destReg;
3236        '''
3237        for reg in range(rCount):
3238            eWalkCode += '''
3239                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3240                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3241            ''' % { "reg" : reg }
3242        eWalkCode += op
3243        for reg in range(rCount):
3244            eWalkCode += '''
3245            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3246            ''' % { "reg" : reg }
3247        iop = InstObjParams(name, Name,
3248                            "RegRegRegImmOp",
3249                            { "code": eWalkCode,
3250                              "r_count": rCount,
3251                              "predicate_test": predicateTest }, [])
3252        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3253        exec_output += NeonEqualRegExecute.subst(iop)
3254        for type in types:
3255            substDict = { "targs" : type,
3256                          "class_name" : Name }
3257            exec_output += NeonExecDeclare.subst(substDict)
3258
3259    vextCode = '''
3260        for (unsigned i = 0; i < eCount; i++) {
3261            unsigned index = i + imm;
3262            if (index < eCount) {
3263                destReg.elements[i] = srcReg1.elements[index];
3264            } else {
3265                index -= eCount;
3266                assert(index < eCount);
3267                destReg.elements[i] = srcReg2.elements[index];
3268            }
3269        }
3270    '''
3271    buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3272    buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3273
3274    def buildVtbxl(name, Name, length, isVtbl):
3275        global header_output, decoder_output, exec_output
3276        code = '''
3277            union
3278            {
3279                uint8_t bytes[32];
3280                FloatRegBits regs[8];
3281            } table;
3282
3283            union
3284            {
3285                uint8_t bytes[8];
3286                FloatRegBits regs[2];
3287            } destReg, srcReg2;
3288
3289            const unsigned length = %(length)d;
3290            const bool isVtbl = %(isVtbl)s;
3291
3292            srcReg2.regs[0] = htog(FpOp2P0.uw);
3293            srcReg2.regs[1] = htog(FpOp2P1.uw);
3294
3295            destReg.regs[0] = htog(FpDestP0.uw);
3296            destReg.regs[1] = htog(FpDestP1.uw);
3297        ''' % { "length" : length, "isVtbl" : isVtbl }
3298        for reg in range(8):
3299            if reg < length * 2:
3300                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3301                        { "reg" : reg }
3302            else:
3303                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3304        code += '''
3305        for (unsigned i = 0; i < sizeof(destReg); i++) {
3306            uint8_t index = srcReg2.bytes[i];
3307            if (index < 8 * length) {
3308                destReg.bytes[i] = table.bytes[index];
3309            } else {
3310                if (isVtbl)
3311                    destReg.bytes[i] = 0;
3312                // else destReg.bytes[i] unchanged
3313            }
3314        }
3315
3316        FpDestP0.uw = gtoh(destReg.regs[0]);
3317        FpDestP1.uw = gtoh(destReg.regs[1]);
3318        '''
3319        iop = InstObjParams(name, Name,
3320                            "RegRegRegOp",
3321                            { "code": code,
3322                              "predicate_test": predicateTest }, [])
3323        header_output += RegRegRegOpDeclare.subst(iop)
3324        decoder_output += RegRegRegOpConstructor.subst(iop)
3325        exec_output += PredOpExecute.subst(iop)
3326
3327    buildVtbxl("vtbl", "NVtbl1", 1, "true")
3328    buildVtbxl("vtbl", "NVtbl2", 2, "true")
3329    buildVtbxl("vtbl", "NVtbl3", 3, "true")
3330    buildVtbxl("vtbl", "NVtbl4", 4, "true")
3331
3332    buildVtbxl("vtbx", "NVtbx1", 1, "false")
3333    buildVtbxl("vtbx", "NVtbx2", 2, "false")
3334    buildVtbxl("vtbx", "NVtbx3", 3, "false")
3335    buildVtbxl("vtbx", "NVtbx4", 4, "false")
3336}};
3337