neon.isa revision 7639:8c09b7ff5b57
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (isSnan(op1) || isSnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (isSnan(op1) || isSnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (isSnan(op1) || isSnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (isSnan(op1) || isSnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (isSnan(op1) || isSnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (isSnan(op1) || isSnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623
624    header_output = ""
625    exec_output = ""
626
627    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630    signedTypes = smallSignedTypes + ("int64_t",)
631    smallTypes = smallUnsignedTypes + smallSignedTypes
632    allTypes = unsignedTypes + signedTypes
633
634    def threeEqualRegInst(name, Name, types, rCount, op,
635                          readDest=False, pairwise=False):
636        global header_output, exec_output
637        eWalkCode = '''
638        RegVect srcReg1, srcReg2, destReg;
639        '''
640        for reg in range(rCount):
641            eWalkCode += '''
642                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
643                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
644            ''' % { "reg" : reg }
645            if readDest:
646                eWalkCode += '''
647                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
648                ''' % { "reg" : reg }
649        readDestCode = ''
650        if readDest:
651            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652        if pairwise:
653            eWalkCode += '''
654            for (unsigned i = 0; i < eCount; i++) {
655                Element srcElem1 = gtoh(2 * i < eCount ?
656                                        srcReg1.elements[2 * i] :
657                                        srcReg2.elements[2 * i - eCount]);
658                Element srcElem2 = gtoh(2 * i < eCount ?
659                                        srcReg1.elements[2 * i + 1] :
660                                        srcReg2.elements[2 * i + 1 - eCount]);
661                Element destElem;
662                %(readDest)s
663                %(op)s
664                destReg.elements[i] = htog(destElem);
665            }
666            ''' % { "op" : op, "readDest" : readDestCode }
667        else:
668            eWalkCode += '''
669            for (unsigned i = 0; i < eCount; i++) {
670                Element srcElem1 = gtoh(srcReg1.elements[i]);
671                Element srcElem2 = gtoh(srcReg2.elements[i]);
672                Element destElem;
673                %(readDest)s
674                %(op)s
675                destReg.elements[i] = htog(destElem);
676            }
677            ''' % { "op" : op, "readDest" : readDestCode }
678        for reg in range(rCount):
679            eWalkCode += '''
680            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681            ''' % { "reg" : reg }
682        iop = InstObjParams(name, Name,
683                            "RegRegRegOp",
684                            { "code": eWalkCode,
685                              "r_count": rCount,
686                              "predicate_test": predicateTest }, [])
687        header_output += NeonRegRegRegOpDeclare.subst(iop)
688        exec_output += NeonEqualRegExecute.subst(iop)
689        for type in types:
690            substDict = { "targs" : type,
691                          "class_name" : Name }
692            exec_output += NeonExecDeclare.subst(substDict)
693
694    def threeEqualRegInstFp(name, Name, types, rCount, op,
695                            readDest=False, pairwise=False, toInt=False):
696        global header_output, exec_output
697        eWalkCode = '''
698        typedef FloatReg FloatVect[rCount];
699        FloatVect srcRegs1, srcRegs2;
700        '''
701        if toInt:
702            eWalkCode += 'RegVect destRegs;\n'
703        else:
704            eWalkCode += 'FloatVect destRegs;\n'
705        for reg in range(rCount):
706            eWalkCode += '''
707                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
708                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
709            ''' % { "reg" : reg }
710            if readDest:
711                if toInt:
712                    eWalkCode += '''
713                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
714                    ''' % { "reg" : reg }
715                else:
716                    eWalkCode += '''
717                        destRegs[%(reg)d] = FpDestP%(reg)d;
718                    ''' % { "reg" : reg }
719        readDestCode = ''
720        if readDest:
721            readDestCode = 'destReg = destRegs[r];'
722        destType = 'FloatReg'
723        writeDest = 'destRegs[r] = destReg;'
724        if toInt:
725            destType = 'FloatRegBits'
726            writeDest = 'destRegs.regs[r] = destReg;'
727        if pairwise:
728            eWalkCode += '''
729            for (unsigned r = 0; r < rCount; r++) {
730                FloatReg srcReg1 = (2 * r < rCount) ?
731                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
732                FloatReg srcReg2 = (2 * r < rCount) ?
733                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
734                %(destType)s destReg;
735                %(readDest)s
736                %(op)s
737                %(writeDest)s
738            }
739            ''' % { "op" : op,
740                    "readDest" : readDestCode,
741                    "destType" : destType,
742                    "writeDest" : writeDest }
743        else:
744            eWalkCode += '''
745            for (unsigned r = 0; r < rCount; r++) {
746                FloatReg srcReg1 = srcRegs1[r];
747                FloatReg srcReg2 = srcRegs2[r];
748                %(destType)s destReg;
749                %(readDest)s
750                %(op)s
751                %(writeDest)s
752            }
753            ''' % { "op" : op,
754                    "readDest" : readDestCode,
755                    "destType" : destType,
756                    "writeDest" : writeDest }
757        for reg in range(rCount):
758            if toInt:
759                eWalkCode += '''
760                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
761                ''' % { "reg" : reg }
762            else:
763                eWalkCode += '''
764                FpDestP%(reg)d = destRegs[%(reg)d];
765                ''' % { "reg" : reg }
766        iop = InstObjParams(name, Name,
767                            "FpRegRegRegOp",
768                            { "code": eWalkCode,
769                              "r_count": rCount,
770                              "predicate_test": predicateTest }, [])
771        header_output += NeonRegRegRegOpDeclare.subst(iop)
772        exec_output += NeonEqualRegExecute.subst(iop)
773        for type in types:
774            substDict = { "targs" : type,
775                          "class_name" : Name }
776            exec_output += NeonExecDeclare.subst(substDict)
777
778    def threeUnequalRegInst(name, Name, types, op,
779                            bigSrc1, bigSrc2, bigDest, readDest):
780        global header_output, exec_output
781        src1Cnt = src2Cnt = destCnt = 2
782        src1Prefix = src2Prefix = destPrefix = ''
783        if bigSrc1:
784            src1Cnt = 4
785            src1Prefix = 'Big'
786        if bigSrc2:
787            src2Cnt = 4
788            src2Prefix = 'Big'
789        if bigDest:
790            destCnt = 4
791            destPrefix = 'Big'
792        eWalkCode = '''
793            %sRegVect srcReg1;
794            %sRegVect srcReg2;
795            %sRegVect destReg;
796        ''' % (src1Prefix, src2Prefix, destPrefix)
797        for reg in range(src1Cnt):
798            eWalkCode += '''
799                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
800            ''' % { "reg" : reg }
801        for reg in range(src2Cnt):
802            eWalkCode += '''
803                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
804            ''' % { "reg" : reg }
805        if readDest:
806            for reg in range(destCnt):
807                eWalkCode += '''
808                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
809                ''' % { "reg" : reg }
810        readDestCode = ''
811        if readDest:
812            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
813        eWalkCode += '''
814        for (unsigned i = 0; i < eCount; i++) {
815            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
816            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
817            %(destPrefix)sElement destElem;
818            %(readDest)s
819            %(op)s
820            destReg.elements[i] = htog(destElem);
821        }
822        ''' % { "op" : op, "readDest" : readDestCode,
823                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
824                "destPrefix" : destPrefix }
825        for reg in range(destCnt):
826            eWalkCode += '''
827            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
828            ''' % { "reg" : reg }
829        iop = InstObjParams(name, Name,
830                            "RegRegRegOp",
831                            { "code": eWalkCode,
832                              "r_count": 2,
833                              "predicate_test": predicateTest }, [])
834        header_output += NeonRegRegRegOpDeclare.subst(iop)
835        exec_output += NeonUnequalRegExecute.subst(iop)
836        for type in types:
837            substDict = { "targs" : type,
838                          "class_name" : Name }
839            exec_output += NeonExecDeclare.subst(substDict)
840
841    def threeRegNarrowInst(name, Name, types, op, readDest=False):
842        threeUnequalRegInst(name, Name, types, op,
843                            True, True, False, readDest)
844
845    def threeRegLongInst(name, Name, types, op, readDest=False):
846        threeUnequalRegInst(name, Name, types, op,
847                            False, False, True, readDest)
848
849    def threeRegWideInst(name, Name, types, op, readDest=False):
850        threeUnequalRegInst(name, Name, types, op,
851                            True, False, True, readDest)
852
853    def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
854        global header_output, exec_output
855        eWalkCode = '''
856        RegVect srcReg1, srcReg2, destReg;
857        '''
858        for reg in range(rCount):
859            eWalkCode += '''
860                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
861                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
862            ''' % { "reg" : reg }
863            if readDest:
864                eWalkCode += '''
865                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
866                ''' % { "reg" : reg }
867        readDestCode = ''
868        if readDest:
869            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
870        eWalkCode += '''
871        assert(imm >= 0 && imm < eCount);
872        for (unsigned i = 0; i < eCount; i++) {
873            Element srcElem1 = gtoh(srcReg1.elements[i]);
874            Element srcElem2 = gtoh(srcReg2.elements[imm]);
875            Element destElem;
876            %(readDest)s
877            %(op)s
878            destReg.elements[i] = htog(destElem);
879        }
880        ''' % { "op" : op, "readDest" : readDestCode }
881        for reg in range(rCount):
882            eWalkCode += '''
883            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
884            ''' % { "reg" : reg }
885        iop = InstObjParams(name, Name,
886                            "RegRegRegImmOp",
887                            { "code": eWalkCode,
888                              "r_count": rCount,
889                              "predicate_test": predicateTest }, [])
890        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
891        exec_output += NeonEqualRegExecute.subst(iop)
892        for type in types:
893            substDict = { "targs" : type,
894                          "class_name" : Name }
895            exec_output += NeonExecDeclare.subst(substDict)
896
897    def twoRegLongInst(name, Name, types, op, readDest=False):
898        global header_output, exec_output
899        rCount = 2
900        eWalkCode = '''
901        RegVect srcReg1, srcReg2;
902        BigRegVect destReg;
903        '''
904        for reg in range(rCount):
905            eWalkCode += '''
906                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
907                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
908            ''' % { "reg" : reg }
909        if readDest:
910            for reg in range(2 * rCount):
911                eWalkCode += '''
912                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
913                ''' % { "reg" : reg }
914        readDestCode = ''
915        if readDest:
916            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
917        eWalkCode += '''
918        assert(imm >= 0 && imm < eCount);
919        for (unsigned i = 0; i < eCount; i++) {
920            Element srcElem1 = gtoh(srcReg1.elements[i]);
921            Element srcElem2 = gtoh(srcReg2.elements[imm]);
922            BigElement destElem;
923            %(readDest)s
924            %(op)s
925            destReg.elements[i] = htog(destElem);
926        }
927        ''' % { "op" : op, "readDest" : readDestCode }
928        for reg in range(2 * rCount):
929            eWalkCode += '''
930            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
931            ''' % { "reg" : reg }
932        iop = InstObjParams(name, Name,
933                            "RegRegRegImmOp",
934                            { "code": eWalkCode,
935                              "r_count": rCount,
936                              "predicate_test": predicateTest }, [])
937        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
938        exec_output += NeonUnequalRegExecute.subst(iop)
939        for type in types:
940            substDict = { "targs" : type,
941                          "class_name" : Name }
942            exec_output += NeonExecDeclare.subst(substDict)
943
944    def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
945        global header_output, exec_output
946        eWalkCode = '''
947        typedef FloatReg FloatVect[rCount];
948        FloatVect srcRegs1, srcRegs2, destRegs;
949        '''
950        for reg in range(rCount):
951            eWalkCode += '''
952                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
953                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
954            ''' % { "reg" : reg }
955            if readDest:
956                eWalkCode += '''
957                    destRegs[%(reg)d] = FpDestP%(reg)d;
958                ''' % { "reg" : reg }
959        readDestCode = ''
960        if readDest:
961            readDestCode = 'destReg = destRegs[i];'
962        eWalkCode += '''
963        assert(imm >= 0 && imm < rCount);
964        for (unsigned i = 0; i < rCount; i++) {
965            FloatReg srcReg1 = srcRegs1[i];
966            FloatReg srcReg2 = srcRegs2[imm];
967            FloatReg destReg;
968            %(readDest)s
969            %(op)s
970            destRegs[i] = destReg;
971        }
972        ''' % { "op" : op, "readDest" : readDestCode }
973        for reg in range(rCount):
974            eWalkCode += '''
975            FpDestP%(reg)d = destRegs[%(reg)d];
976            ''' % { "reg" : reg }
977        iop = InstObjParams(name, Name,
978                            "FpRegRegRegImmOp",
979                            { "code": eWalkCode,
980                              "r_count": rCount,
981                              "predicate_test": predicateTest }, [])
982        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
983        exec_output += NeonEqualRegExecute.subst(iop)
984        for type in types:
985            substDict = { "targs" : type,
986                          "class_name" : Name }
987            exec_output += NeonExecDeclare.subst(substDict)
988
989    def twoRegShiftInst(name, Name, types, rCount, op,
990            readDest=False, toInt=False, fromInt=False):
991        global header_output, exec_output
992        eWalkCode = '''
993        RegVect srcRegs1, destRegs;
994        '''
995        for reg in range(rCount):
996            eWalkCode += '''
997                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
998            ''' % { "reg" : reg }
999            if readDest:
1000                eWalkCode += '''
1001                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1002                ''' % { "reg" : reg }
1003        readDestCode = ''
1004        if readDest:
1005            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1006            if toInt:
1007                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1008        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1009        if fromInt:
1010            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1011        declDest = 'Element destElem;'
1012        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1013        if toInt:
1014            declDest = 'FloatRegBits destReg;'
1015            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1016        eWalkCode += '''
1017        for (unsigned i = 0; i < eCount; i++) {
1018            %(readOp)s
1019            %(declDest)s
1020            %(readDest)s
1021            %(op)s
1022            %(writeDest)s
1023        }
1024        ''' % { "readOp" : readOpCode,
1025                "declDest" : declDest,
1026                "readDest" : readDestCode,
1027                "op" : op,
1028                "writeDest" : writeDestCode }
1029        for reg in range(rCount):
1030            eWalkCode += '''
1031            FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1032            ''' % { "reg" : reg }
1033        iop = InstObjParams(name, Name,
1034                            "RegRegImmOp",
1035                            { "code": eWalkCode,
1036                              "r_count": rCount,
1037                              "predicate_test": predicateTest }, [])
1038        header_output += NeonRegRegImmOpDeclare.subst(iop)
1039        exec_output += NeonEqualRegExecute.subst(iop)
1040        for type in types:
1041            substDict = { "targs" : type,
1042                          "class_name" : Name }
1043            exec_output += NeonExecDeclare.subst(substDict)
1044
1045    def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1046        global header_output, exec_output
1047        eWalkCode = '''
1048        BigRegVect srcReg1;
1049        RegVect destReg;
1050        '''
1051        for reg in range(4):
1052            eWalkCode += '''
1053                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1054            ''' % { "reg" : reg }
1055        if readDest:
1056            for reg in range(2):
1057                eWalkCode += '''
1058                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1059                ''' % { "reg" : reg }
1060        readDestCode = ''
1061        if readDest:
1062            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1063        eWalkCode += '''
1064        for (unsigned i = 0; i < eCount; i++) {
1065            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1066            Element destElem;
1067            %(readDest)s
1068            %(op)s
1069            destReg.elements[i] = htog(destElem);
1070        }
1071        ''' % { "op" : op, "readDest" : readDestCode }
1072        for reg in range(2):
1073            eWalkCode += '''
1074            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1075            ''' % { "reg" : reg }
1076        iop = InstObjParams(name, Name,
1077                            "RegRegImmOp",
1078                            { "code": eWalkCode,
1079                              "r_count": 2,
1080                              "predicate_test": predicateTest }, [])
1081        header_output += NeonRegRegImmOpDeclare.subst(iop)
1082        exec_output += NeonUnequalRegExecute.subst(iop)
1083        for type in types:
1084            substDict = { "targs" : type,
1085                          "class_name" : Name }
1086            exec_output += NeonExecDeclare.subst(substDict)
1087
1088    def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1089        global header_output, exec_output
1090        eWalkCode = '''
1091        RegVect srcReg1;
1092        BigRegVect destReg;
1093        '''
1094        for reg in range(2):
1095            eWalkCode += '''
1096                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1097            ''' % { "reg" : reg }
1098        if readDest:
1099            for reg in range(4):
1100                eWalkCode += '''
1101                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1102                ''' % { "reg" : reg }
1103        readDestCode = ''
1104        if readDest:
1105            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1106        eWalkCode += '''
1107        for (unsigned i = 0; i < eCount; i++) {
1108            Element srcElem1 = gtoh(srcReg1.elements[i]);
1109            BigElement destElem;
1110            %(readDest)s
1111            %(op)s
1112            destReg.elements[i] = htog(destElem);
1113        }
1114        ''' % { "op" : op, "readDest" : readDestCode }
1115        for reg in range(4):
1116            eWalkCode += '''
1117            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1118            ''' % { "reg" : reg }
1119        iop = InstObjParams(name, Name,
1120                            "RegRegImmOp",
1121                            { "code": eWalkCode,
1122                              "r_count": 2,
1123                              "predicate_test": predicateTest }, [])
1124        header_output += NeonRegRegImmOpDeclare.subst(iop)
1125        exec_output += NeonUnequalRegExecute.subst(iop)
1126        for type in types:
1127            substDict = { "targs" : type,
1128                          "class_name" : Name }
1129            exec_output += NeonExecDeclare.subst(substDict)
1130
1131    def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1132        global header_output, exec_output
1133        eWalkCode = '''
1134        RegVect srcReg1, destReg;
1135        '''
1136        for reg in range(rCount):
1137            eWalkCode += '''
1138                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1139            ''' % { "reg" : reg }
1140            if readDest:
1141                eWalkCode += '''
1142                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1143                ''' % { "reg" : reg }
1144        readDestCode = ''
1145        if readDest:
1146            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1147        eWalkCode += '''
1148        for (unsigned i = 0; i < eCount; i++) {
1149            unsigned j = i;
1150            Element srcElem1 = gtoh(srcReg1.elements[i]);
1151            Element destElem;
1152            %(readDest)s
1153            %(op)s
1154            destReg.elements[j] = htog(destElem);
1155        }
1156        ''' % { "op" : op, "readDest" : readDestCode }
1157        for reg in range(rCount):
1158            eWalkCode += '''
1159            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1160            ''' % { "reg" : reg }
1161        iop = InstObjParams(name, Name,
1162                            "RegRegOp",
1163                            { "code": eWalkCode,
1164                              "r_count": rCount,
1165                              "predicate_test": predicateTest }, [])
1166        header_output += NeonRegRegOpDeclare.subst(iop)
1167        exec_output += NeonEqualRegExecute.subst(iop)
1168        for type in types:
1169            substDict = { "targs" : type,
1170                          "class_name" : Name }
1171            exec_output += NeonExecDeclare.subst(substDict)
1172
1173    def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1174        global header_output, exec_output
1175        eWalkCode = '''
1176        RegVect srcReg1, destReg;
1177        '''
1178        for reg in range(rCount):
1179            eWalkCode += '''
1180                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1181            ''' % { "reg" : reg }
1182            if readDest:
1183                eWalkCode += '''
1184                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1185                ''' % { "reg" : reg }
1186        readDestCode = ''
1187        if readDest:
1188            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1189        eWalkCode += '''
1190        for (unsigned i = 0; i < eCount; i++) {
1191            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1192            Element destElem;
1193            %(readDest)s
1194            %(op)s
1195            destReg.elements[i] = htog(destElem);
1196        }
1197        ''' % { "op" : op, "readDest" : readDestCode }
1198        for reg in range(rCount):
1199            eWalkCode += '''
1200            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1201            ''' % { "reg" : reg }
1202        iop = InstObjParams(name, Name,
1203                            "RegRegImmOp",
1204                            { "code": eWalkCode,
1205                              "r_count": rCount,
1206                              "predicate_test": predicateTest }, [])
1207        header_output += NeonRegRegImmOpDeclare.subst(iop)
1208        exec_output += NeonEqualRegExecute.subst(iop)
1209        for type in types:
1210            substDict = { "targs" : type,
1211                          "class_name" : Name }
1212            exec_output += NeonExecDeclare.subst(substDict)
1213
1214    def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1215        global header_output, exec_output
1216        eWalkCode = '''
1217        RegVect srcReg1, destReg;
1218        '''
1219        for reg in range(rCount):
1220            eWalkCode += '''
1221                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1222                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1223            ''' % { "reg" : reg }
1224            if readDest:
1225                eWalkCode += '''
1226                ''' % { "reg" : reg }
1227        readDestCode = ''
1228        if readDest:
1229            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1230        eWalkCode += op
1231        for reg in range(rCount):
1232            eWalkCode += '''
1233            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1234            FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1235            ''' % { "reg" : reg }
1236        iop = InstObjParams(name, Name,
1237                            "RegRegOp",
1238                            { "code": eWalkCode,
1239                              "r_count": rCount,
1240                              "predicate_test": predicateTest }, [])
1241        header_output += NeonRegRegOpDeclare.subst(iop)
1242        exec_output += NeonEqualRegExecute.subst(iop)
1243        for type in types:
1244            substDict = { "targs" : type,
1245                          "class_name" : Name }
1246            exec_output += NeonExecDeclare.subst(substDict)
1247
1248    def twoRegMiscInstFp(name, Name, types, rCount, op,
1249            readDest=False, toInt=False):
1250        global header_output, exec_output
1251        eWalkCode = '''
1252        typedef FloatReg FloatVect[rCount];
1253        FloatVect srcRegs1;
1254        '''
1255        if toInt:
1256            eWalkCode += 'RegVect destRegs;\n'
1257        else:
1258            eWalkCode += 'FloatVect destRegs;\n'
1259        for reg in range(rCount):
1260            eWalkCode += '''
1261                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1262            ''' % { "reg" : reg }
1263            if readDest:
1264                if toInt:
1265                    eWalkCode += '''
1266                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1267                    ''' % { "reg" : reg }
1268                else:
1269                    eWalkCode += '''
1270                        destRegs[%(reg)d] = FpDestP%(reg)d;
1271                    ''' % { "reg" : reg }
1272        readDestCode = ''
1273        if readDest:
1274            readDestCode = 'destReg = destRegs[i];'
1275        destType = 'FloatReg'
1276        writeDest = 'destRegs[r] = destReg;'
1277        if toInt:
1278            destType = 'FloatRegBits'
1279            writeDest = 'destRegs.regs[r] = destReg;'
1280        eWalkCode += '''
1281        for (unsigned r = 0; r < rCount; r++) {
1282            FloatReg srcReg1 = srcRegs1[r];
1283            %(destType)s destReg;
1284            %(readDest)s
1285            %(op)s
1286            %(writeDest)s
1287        }
1288        ''' % { "op" : op,
1289                "readDest" : readDestCode,
1290                "destType" : destType,
1291                "writeDest" : writeDest }
1292        for reg in range(rCount):
1293            if toInt:
1294                eWalkCode += '''
1295                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1296                ''' % { "reg" : reg }
1297            else:
1298                eWalkCode += '''
1299                FpDestP%(reg)d = destRegs[%(reg)d];
1300                ''' % { "reg" : reg }
1301        iop = InstObjParams(name, Name,
1302                            "FpRegRegOp",
1303                            { "code": eWalkCode,
1304                              "r_count": rCount,
1305                              "predicate_test": predicateTest }, [])
1306        header_output += NeonRegRegOpDeclare.subst(iop)
1307        exec_output += NeonEqualRegExecute.subst(iop)
1308        for type in types:
1309            substDict = { "targs" : type,
1310                          "class_name" : Name }
1311            exec_output += NeonExecDeclare.subst(substDict)
1312
1313    def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1314        global header_output, exec_output
1315        eWalkCode = '''
1316        RegVect srcRegs;
1317        BigRegVect destReg;
1318        '''
1319        for reg in range(rCount):
1320            eWalkCode += '''
1321                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1322            ''' % { "reg" : reg }
1323            if readDest:
1324                eWalkCode += '''
1325                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1326                ''' % { "reg" : reg }
1327        readDestCode = ''
1328        if readDest:
1329            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1330        eWalkCode += '''
1331        for (unsigned i = 0; i < eCount / 2; i++) {
1332            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1333            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1334            BigElement destElem;
1335            %(readDest)s
1336            %(op)s
1337            destReg.elements[i] = htog(destElem);
1338        }
1339        ''' % { "op" : op, "readDest" : readDestCode }
1340        for reg in range(rCount):
1341            eWalkCode += '''
1342            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1343            ''' % { "reg" : reg }
1344        iop = InstObjParams(name, Name,
1345                            "RegRegOp",
1346                            { "code": eWalkCode,
1347                              "r_count": rCount,
1348                              "predicate_test": predicateTest }, [])
1349        header_output += NeonRegRegOpDeclare.subst(iop)
1350        exec_output += NeonUnequalRegExecute.subst(iop)
1351        for type in types:
1352            substDict = { "targs" : type,
1353                          "class_name" : Name }
1354            exec_output += NeonExecDeclare.subst(substDict)
1355
1356    def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1357        global header_output, exec_output
1358        eWalkCode = '''
1359        BigRegVect srcReg1;
1360        RegVect destReg;
1361        '''
1362        for reg in range(4):
1363            eWalkCode += '''
1364                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1365            ''' % { "reg" : reg }
1366        if readDest:
1367            for reg in range(2):
1368                eWalkCode += '''
1369                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1370                ''' % { "reg" : reg }
1371        readDestCode = ''
1372        if readDest:
1373            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1374        eWalkCode += '''
1375        for (unsigned i = 0; i < eCount; i++) {
1376            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1377            Element destElem;
1378            %(readDest)s
1379            %(op)s
1380            destReg.elements[i] = htog(destElem);
1381        }
1382        ''' % { "op" : op, "readDest" : readDestCode }
1383        for reg in range(2):
1384            eWalkCode += '''
1385            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1386            ''' % { "reg" : reg }
1387        iop = InstObjParams(name, Name,
1388                            "RegRegOp",
1389                            { "code": eWalkCode,
1390                              "r_count": 2,
1391                              "predicate_test": predicateTest }, [])
1392        header_output += NeonRegRegOpDeclare.subst(iop)
1393        exec_output += NeonUnequalRegExecute.subst(iop)
1394        for type in types:
1395            substDict = { "targs" : type,
1396                          "class_name" : Name }
1397            exec_output += NeonExecDeclare.subst(substDict)
1398
1399    def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1400        global header_output, exec_output
1401        eWalkCode = '''
1402        RegVect destReg;
1403        '''
1404        if readDest:
1405            for reg in range(rCount):
1406                eWalkCode += '''
1407                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1408                ''' % { "reg" : reg }
1409        readDestCode = ''
1410        if readDest:
1411            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1412        eWalkCode += '''
1413        for (unsigned i = 0; i < eCount; i++) {
1414            Element destElem;
1415            %(readDest)s
1416            %(op)s
1417            destReg.elements[i] = htog(destElem);
1418        }
1419        ''' % { "op" : op, "readDest" : readDestCode }
1420        for reg in range(rCount):
1421            eWalkCode += '''
1422            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1423            ''' % { "reg" : reg }
1424        iop = InstObjParams(name, Name,
1425                            "RegImmOp",
1426                            { "code": eWalkCode,
1427                              "r_count": rCount,
1428                              "predicate_test": predicateTest }, [])
1429        header_output += NeonRegImmOpDeclare.subst(iop)
1430        exec_output += NeonEqualRegExecute.subst(iop)
1431        for type in types:
1432            substDict = { "targs" : type,
1433                          "class_name" : Name }
1434            exec_output += NeonExecDeclare.subst(substDict)
1435
1436    def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1437        global header_output, exec_output
1438        eWalkCode = '''
1439        RegVect srcReg1;
1440        BigRegVect destReg;
1441        '''
1442        for reg in range(2):
1443            eWalkCode += '''
1444                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1445            ''' % { "reg" : reg }
1446        if readDest:
1447            for reg in range(4):
1448                eWalkCode += '''
1449                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1450                ''' % { "reg" : reg }
1451        readDestCode = ''
1452        if readDest:
1453            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1454        eWalkCode += '''
1455        for (unsigned i = 0; i < eCount; i++) {
1456            Element srcElem1 = gtoh(srcReg1.elements[i]);
1457            BigElement destElem;
1458            %(readDest)s
1459            %(op)s
1460            destReg.elements[i] = htog(destElem);
1461        }
1462        ''' % { "op" : op, "readDest" : readDestCode }
1463        for reg in range(4):
1464            eWalkCode += '''
1465            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1466            ''' % { "reg" : reg }
1467        iop = InstObjParams(name, Name,
1468                            "RegRegOp",
1469                            { "code": eWalkCode,
1470                              "r_count": 2,
1471                              "predicate_test": predicateTest }, [])
1472        header_output += NeonRegRegOpDeclare.subst(iop)
1473        exec_output += NeonUnequalRegExecute.subst(iop)
1474        for type in types:
1475            substDict = { "targs" : type,
1476                          "class_name" : Name }
1477            exec_output += NeonExecDeclare.subst(substDict)
1478
1479    vhaddCode = '''
1480        Element carryBit =
1481            (((unsigned)srcElem1 & 0x1) +
1482             ((unsigned)srcElem2 & 0x1)) >> 1;
1483        // Use division instead of a shift to ensure the sign extension works
1484        // right. The compiler will figure out if it can be a shift. Mask the
1485        // inputs so they get truncated correctly.
1486        destElem = (((srcElem1 & ~(Element)1) / 2) +
1487                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1488    '''
1489    threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1490    threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1491
1492    vrhaddCode = '''
1493        Element carryBit =
1494            (((unsigned)srcElem1 & 0x1) +
1495             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1496        // Use division instead of a shift to ensure the sign extension works
1497        // right. The compiler will figure out if it can be a shift. Mask the
1498        // inputs so they get truncated correctly.
1499        destElem = (((srcElem1 & ~(Element)1) / 2) +
1500                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1501    '''
1502    threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1503    threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1504
1505    vhsubCode = '''
1506        Element barrowBit =
1507            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1508        // Use division instead of a shift to ensure the sign extension works
1509        // right. The compiler will figure out if it can be a shift. Mask the
1510        // inputs so they get truncated correctly.
1511        destElem = (((srcElem1 & ~(Element)1) / 2) -
1512                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1513    '''
1514    threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1515    threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1516
1517    vandCode = '''
1518        destElem = srcElem1 & srcElem2;
1519    '''
1520    threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1521    threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1522
1523    vbicCode = '''
1524        destElem = srcElem1 & ~srcElem2;
1525    '''
1526    threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1527    threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1528
1529    vorrCode = '''
1530        destElem = srcElem1 | srcElem2;
1531    '''
1532    threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1533    threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1534
1535    threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1536    threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1537
1538    vornCode = '''
1539        destElem = srcElem1 | ~srcElem2;
1540    '''
1541    threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1542    threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1543
1544    veorCode = '''
1545        destElem = srcElem1 ^ srcElem2;
1546    '''
1547    threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1548    threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1549
1550    vbifCode = '''
1551        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1552    '''
1553    threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1554    threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1555    vbitCode = '''
1556        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1557    '''
1558    threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1559    threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1560    vbslCode = '''
1561        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1562    '''
1563    threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1564    threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1565
1566    vmaxCode = '''
1567        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1568    '''
1569    threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1570    threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1571
1572    vminCode = '''
1573        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1574    '''
1575    threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1576    threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1577
1578    vaddCode = '''
1579        destElem = srcElem1 + srcElem2;
1580    '''
1581    threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1582    threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1583
1584    threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1585                      2, vaddCode, pairwise=True)
1586    threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1587                      4, vaddCode, pairwise=True)
1588    vaddlwCode = '''
1589        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1590    '''
1591    threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1592    threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1593    vaddhnCode = '''
1594        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1595                   (sizeof(Element) * 8);
1596    '''
1597    threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1598    vraddhnCode = '''
1599        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1600                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1601                   (sizeof(Element) * 8);
1602    '''
1603    threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1604
1605    vsubCode = '''
1606        destElem = srcElem1 - srcElem2;
1607    '''
1608    threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1609    threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1610    vsublwCode = '''
1611        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1612    '''
1613    threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1614    threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1615
1616    vqaddUCode = '''
1617        destElem = srcElem1 + srcElem2;
1618        FPSCR fpscr = (FPSCR)Fpscr;
1619        if (destElem < srcElem1 || destElem < srcElem2) {
1620            destElem = (Element)(-1);
1621            fpscr.qc = 1;
1622        }
1623        Fpscr = fpscr;
1624    '''
1625    threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1626    threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1627    vsubhnCode = '''
1628        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1629                   (sizeof(Element) * 8);
1630    '''
1631    threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1632    vrsubhnCode = '''
1633        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1634                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1635                   (sizeof(Element) * 8);
1636    '''
1637    threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1638
1639    vqaddSCode = '''
1640        destElem = srcElem1 + srcElem2;
1641        FPSCR fpscr = (FPSCR)Fpscr;
1642        bool negDest = (destElem < 0);
1643        bool negSrc1 = (srcElem1 < 0);
1644        bool negSrc2 = (srcElem2 < 0);
1645        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1646            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1647            if (negDest)
1648                destElem -= 1;
1649            fpscr.qc = 1;
1650        }
1651        Fpscr = fpscr;
1652    '''
1653    threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1654    threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1655
1656    vqsubUCode = '''
1657        destElem = srcElem1 - srcElem2;
1658        FPSCR fpscr = (FPSCR)Fpscr;
1659        if (destElem > srcElem1) {
1660            destElem = 0;
1661            fpscr.qc = 1;
1662        }
1663        Fpscr = fpscr;
1664    '''
1665    threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1666    threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1667
1668    vqsubSCode = '''
1669        destElem = srcElem1 - srcElem2;
1670        FPSCR fpscr = (FPSCR)Fpscr;
1671        bool negDest = (destElem < 0);
1672        bool negSrc1 = (srcElem1 < 0);
1673        bool posSrc2 = (srcElem2 >= 0);
1674        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1675            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1676            if (negDest)
1677                destElem -= 1;
1678            fpscr.qc = 1;
1679        }
1680        Fpscr = fpscr;
1681    '''
1682    threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1683    threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1684
1685    vcgtCode = '''
1686        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1687    '''
1688    threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1689    threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1690
1691    vcgeCode = '''
1692        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1693    '''
1694    threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1695    threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1696
1697    vceqCode = '''
1698        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1699    '''
1700    threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1701    threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1702
1703    vshlCode = '''
1704        int16_t shiftAmt = (int8_t)srcElem2;
1705        if (shiftAmt < 0) {
1706            shiftAmt = -shiftAmt;
1707            if (shiftAmt >= sizeof(Element) * 8) {
1708                shiftAmt = sizeof(Element) * 8 - 1;
1709                destElem = 0;
1710            } else {
1711                destElem = (srcElem1 >> shiftAmt);
1712            }
1713            // Make sure the right shift sign extended when it should.
1714            if (srcElem1 < 0 && destElem >= 0) {
1715                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1716                                             1 - shiftAmt));
1717            }
1718        } else {
1719            if (shiftAmt >= sizeof(Element) * 8) {
1720                destElem = 0;
1721            } else {
1722                destElem = srcElem1 << shiftAmt;
1723            }
1724        }
1725    '''
1726    threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1727    threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1728
1729    vrshlCode = '''
1730        int16_t shiftAmt = (int8_t)srcElem2;
1731        if (shiftAmt < 0) {
1732            shiftAmt = -shiftAmt;
1733            Element rBit = 0;
1734            if (shiftAmt <= sizeof(Element) * 8)
1735                rBit = bits(srcElem1, shiftAmt - 1);
1736            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1737                rBit = 1;
1738            if (shiftAmt >= sizeof(Element) * 8) {
1739                shiftAmt = sizeof(Element) * 8 - 1;
1740                destElem = 0;
1741            } else {
1742                destElem = (srcElem1 >> shiftAmt);
1743            }
1744            // Make sure the right shift sign extended when it should.
1745            if (srcElem1 < 0 && destElem >= 0) {
1746                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1747                                             1 - shiftAmt));
1748            }
1749            destElem += rBit;
1750        } else if (shiftAmt > 0) {
1751            if (shiftAmt >= sizeof(Element) * 8) {
1752                destElem = 0;
1753            } else {
1754                destElem = srcElem1 << shiftAmt;
1755            }
1756        } else {
1757            destElem = srcElem1;
1758        }
1759    '''
1760    threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1761    threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1762
1763    vqshlUCode = '''
1764        int16_t shiftAmt = (int8_t)srcElem2;
1765        FPSCR fpscr = (FPSCR)Fpscr;
1766        if (shiftAmt < 0) {
1767            shiftAmt = -shiftAmt;
1768            if (shiftAmt >= sizeof(Element) * 8) {
1769                shiftAmt = sizeof(Element) * 8 - 1;
1770                destElem = 0;
1771            } else {
1772                destElem = (srcElem1 >> shiftAmt);
1773            }
1774            // Make sure the right shift sign extended when it should.
1775            if (srcElem1 < 0 && destElem >= 0) {
1776                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1777                                             1 - shiftAmt));
1778            }
1779        } else if (shiftAmt > 0) {
1780            if (shiftAmt >= sizeof(Element) * 8) {
1781                if (srcElem1 != 0) {
1782                    destElem = mask(sizeof(Element) * 8);
1783                    fpscr.qc = 1;
1784                } else {
1785                    destElem = 0;
1786                }
1787            } else {
1788                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1789                            sizeof(Element) * 8 - shiftAmt)) {
1790                    destElem = mask(sizeof(Element) * 8);
1791                    fpscr.qc = 1;
1792                } else {
1793                    destElem = srcElem1 << shiftAmt;
1794                }
1795            }
1796        } else {
1797            destElem = srcElem1;
1798        }
1799        Fpscr = fpscr;
1800    '''
1801    threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1802    threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1803
1804    vqshlSCode = '''
1805        int16_t shiftAmt = (int8_t)srcElem2;
1806        FPSCR fpscr = (FPSCR)Fpscr;
1807        if (shiftAmt < 0) {
1808            shiftAmt = -shiftAmt;
1809            if (shiftAmt >= sizeof(Element) * 8) {
1810                shiftAmt = sizeof(Element) * 8 - 1;
1811                destElem = 0;
1812            } else {
1813                destElem = (srcElem1 >> shiftAmt);
1814            }
1815            // Make sure the right shift sign extended when it should.
1816            if (srcElem1 < 0 && destElem >= 0) {
1817                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1818                                             1 - shiftAmt));
1819            }
1820        } else if (shiftAmt > 0) {
1821            bool sat = false;
1822            if (shiftAmt >= sizeof(Element) * 8) {
1823                if (srcElem1 != 0)
1824                    sat = true;
1825                else
1826                    destElem = 0;
1827            } else {
1828                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1829                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1830                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1831                    sat = true;
1832                } else {
1833                    destElem = srcElem1 << shiftAmt;
1834                }
1835            }
1836            if (sat) {
1837                fpscr.qc = 1;
1838                destElem = mask(sizeof(Element) * 8 - 1);
1839                if (srcElem1 < 0)
1840                    destElem = ~destElem;
1841            }
1842        } else {
1843            destElem = srcElem1;
1844        }
1845        Fpscr = fpscr;
1846    '''
1847    threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1848    threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1849
1850    vqrshlUCode = '''
1851        int16_t shiftAmt = (int8_t)srcElem2;
1852        FPSCR fpscr = (FPSCR)Fpscr;
1853        if (shiftAmt < 0) {
1854            shiftAmt = -shiftAmt;
1855            Element rBit = 0;
1856            if (shiftAmt <= sizeof(Element) * 8)
1857                rBit = bits(srcElem1, shiftAmt - 1);
1858            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1859                rBit = 1;
1860            if (shiftAmt >= sizeof(Element) * 8) {
1861                shiftAmt = sizeof(Element) * 8 - 1;
1862                destElem = 0;
1863            } else {
1864                destElem = (srcElem1 >> shiftAmt);
1865            }
1866            // Make sure the right shift sign extended when it should.
1867            if (srcElem1 < 0 && destElem >= 0) {
1868                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1869                                             1 - shiftAmt));
1870            }
1871            destElem += rBit;
1872        } else {
1873            if (shiftAmt >= sizeof(Element) * 8) {
1874                if (srcElem1 != 0) {
1875                    destElem = mask(sizeof(Element) * 8);
1876                    fpscr.qc = 1;
1877                } else {
1878                    destElem = 0;
1879                }
1880            } else {
1881                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1882                            sizeof(Element) * 8 - shiftAmt)) {
1883                    destElem = mask(sizeof(Element) * 8);
1884                    fpscr.qc = 1;
1885                } else {
1886                    destElem = srcElem1 << shiftAmt;
1887                }
1888            }
1889        }
1890        Fpscr = fpscr;
1891    '''
1892    threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1893    threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1894
1895    vqrshlSCode = '''
1896        int16_t shiftAmt = (int8_t)srcElem2;
1897        FPSCR fpscr = (FPSCR)Fpscr;
1898        if (shiftAmt < 0) {
1899            shiftAmt = -shiftAmt;
1900            Element rBit = 0;
1901            if (shiftAmt <= sizeof(Element) * 8)
1902                rBit = bits(srcElem1, shiftAmt - 1);
1903            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1904                rBit = 1;
1905            if (shiftAmt >= sizeof(Element) * 8) {
1906                shiftAmt = sizeof(Element) * 8 - 1;
1907                destElem = 0;
1908            } else {
1909                destElem = (srcElem1 >> shiftAmt);
1910            }
1911            // Make sure the right shift sign extended when it should.
1912            if (srcElem1 < 0 && destElem >= 0) {
1913                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1914                                             1 - shiftAmt));
1915            }
1916            destElem += rBit;
1917        } else if (shiftAmt > 0) {
1918            bool sat = false;
1919            if (shiftAmt >= sizeof(Element) * 8) {
1920                if (srcElem1 != 0)
1921                    sat = true;
1922                else
1923                    destElem = 0;
1924            } else {
1925                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1926                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1927                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1928                    sat = true;
1929                } else {
1930                    destElem = srcElem1 << shiftAmt;
1931                }
1932            }
1933            if (sat) {
1934                fpscr.qc = 1;
1935                destElem = mask(sizeof(Element) * 8 - 1);
1936                if (srcElem1 < 0)
1937                    destElem = ~destElem;
1938            }
1939        } else {
1940            destElem = srcElem1;
1941        }
1942        Fpscr = fpscr;
1943    '''
1944    threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1945    threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1946
1947    vabaCode = '''
1948        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1949                                            (srcElem2 - srcElem1);
1950    '''
1951    threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1952    threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1953    vabalCode = '''
1954        destElem += (srcElem1 > srcElem2) ?
1955            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1956            ((BigElement)srcElem2 - (BigElement)srcElem1);
1957    '''
1958    threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1959
1960    vabdCode = '''
1961        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1962                                           (srcElem2 - srcElem1);
1963    '''
1964    threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1965    threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1966    vabdlCode = '''
1967        destElem = (srcElem1 > srcElem2) ?
1968            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1969            ((BigElement)srcElem2 - (BigElement)srcElem1);
1970    '''
1971    threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1972
1973    vtstCode = '''
1974        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1975    '''
1976    threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1977    threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1978
1979    vmulCode = '''
1980        destElem = srcElem1 * srcElem2;
1981    '''
1982    threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1983    threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1984    vmullCode = '''
1985        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1986    '''
1987    threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1988
1989    vmlaCode = '''
1990        destElem = destElem + srcElem1 * srcElem2;
1991    '''
1992    threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
1993    threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
1994    vmlalCode = '''
1995        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
1996    '''
1997    threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
1998
1999    vqdmlalCode = '''
2000        FPSCR fpscr = (FPSCR)Fpscr;
2001        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2002        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2003        Element halfNeg = maxNeg / 2;
2004        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2005            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2006            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2007            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2008            fpscr.qc = 1;
2009        }
2010        bool negPreDest = (destElem < 0);
2011        destElem += midElem;
2012        bool negDest = (destElem < 0);
2013        bool negMid = (midElem < 0);
2014        if (negPreDest == negMid && negMid != negDest) {
2015            destElem = mask(sizeof(BigElement) * 8 - 1);
2016            if (negPreDest)
2017                destElem = ~destElem;
2018            fpscr.qc = 1;
2019        }
2020        Fpscr = fpscr;
2021    '''
2022    threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2023
2024    vqdmlslCode = '''
2025        FPSCR fpscr = (FPSCR)Fpscr;
2026        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2027        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2028        Element halfNeg = maxNeg / 2;
2029        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2030            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2031            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2032            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2033            fpscr.qc = 1;
2034        }
2035        bool negPreDest = (destElem < 0);
2036        destElem -= midElem;
2037        bool negDest = (destElem < 0);
2038        bool posMid = (midElem > 0);
2039        if (negPreDest == posMid && posMid != negDest) {
2040            destElem = mask(sizeof(BigElement) * 8 - 1);
2041            if (negPreDest)
2042                destElem = ~destElem;
2043            fpscr.qc = 1;
2044        }
2045        Fpscr = fpscr;
2046    '''
2047    threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2048
2049    vqdmullCode = '''
2050        FPSCR fpscr = (FPSCR)Fpscr;
2051        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2052        if (srcElem1 == srcElem2 &&
2053                srcElem1 == (Element)((Element)1 <<
2054                    (Element)(sizeof(Element) * 8 - 1))) {
2055            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2056            fpscr.qc = 1;
2057        }
2058        Fpscr = fpscr;
2059    '''
2060    threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2061
2062    vmlsCode = '''
2063        destElem = destElem - srcElem1 * srcElem2;
2064    '''
2065    threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2066    threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2067    vmlslCode = '''
2068        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2069    '''
2070    threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2071
2072    vmulpCode = '''
2073        destElem = 0;
2074        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2075            if (bits(srcElem2, j))
2076                destElem ^= srcElem1 << j;
2077        }
2078    '''
2079    threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2080    threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2081    vmullpCode = '''
2082        destElem = 0;
2083        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2084            if (bits(srcElem2, j))
2085                destElem ^= (BigElement)srcElem1 << j;
2086        }
2087    '''
2088    threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2089
2090    threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2091    threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2092
2093    threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2094    threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2095
2096    vqdmulhCode = '''
2097        FPSCR fpscr = (FPSCR)Fpscr;
2098        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2099                   (sizeof(Element) * 8);
2100        if (srcElem1 == srcElem2 &&
2101                srcElem1 == (Element)((Element)1 <<
2102                    (sizeof(Element) * 8 - 1))) {
2103            destElem = ~srcElem1;
2104            fpscr.qc = 1;
2105        }
2106        Fpscr = fpscr;
2107    '''
2108    threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2109    threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2110
2111    vqrdmulhCode = '''
2112        FPSCR fpscr = (FPSCR)Fpscr;
2113        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2114                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2115                   (sizeof(Element) * 8);
2116        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2117        Element halfNeg = maxNeg / 2;
2118        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2119            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2120            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2121            if (destElem < 0) {
2122                destElem = mask(sizeof(Element) * 8 - 1);
2123            } else {
2124                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2125            }
2126            fpscr.qc = 1;
2127        }
2128        Fpscr = fpscr;
2129    '''
2130    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2131            smallSignedTypes, 2, vqrdmulhCode)
2132    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2133            smallSignedTypes, 4, vqrdmulhCode)
2134
2135    vmaxfpCode = '''
2136        FPSCR fpscr = (FPSCR)Fpscr;
2137        bool done;
2138        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2139        if (!done) {
2140            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2141                               true, true, VfpRoundNearest);
2142        } else if (flushToZero(srcReg1, srcReg2)) {
2143            fpscr.idc = 1;
2144        }
2145        Fpscr = fpscr;
2146    '''
2147    threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2148    threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2149
2150    vminfpCode = '''
2151        FPSCR fpscr = (FPSCR)Fpscr;
2152        bool done;
2153        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2154        if (!done) {
2155            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2156                               true, true, VfpRoundNearest);
2157        } else if (flushToZero(srcReg1, srcReg2)) {
2158            fpscr.idc = 1;
2159        }
2160        Fpscr = fpscr;
2161    '''
2162    threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2163    threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2164
2165    threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2166                        2, vmaxfpCode, pairwise=True)
2167    threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2168                        4, vmaxfpCode, pairwise=True)
2169
2170    threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2171                        2, vminfpCode, pairwise=True)
2172    threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2173                        4, vminfpCode, pairwise=True)
2174
2175    vaddfpCode = '''
2176        FPSCR fpscr = Fpscr;
2177        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2178                           true, true, VfpRoundNearest);
2179        Fpscr = fpscr;
2180    '''
2181    threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2182    threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2183
2184    threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2185                        2, vaddfpCode, pairwise=True)
2186    threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2187                        4, vaddfpCode, pairwise=True)
2188
2189    vsubfpCode = '''
2190        FPSCR fpscr = Fpscr;
2191        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2192                           true, true, VfpRoundNearest);
2193        Fpscr = fpscr;
2194    '''
2195    threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2196    threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2197
2198    vmulfpCode = '''
2199        FPSCR fpscr = Fpscr;
2200        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2201                           true, true, VfpRoundNearest);
2202        Fpscr = fpscr;
2203    '''
2204    threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2205    threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2206
2207    vmlafpCode = '''
2208        FPSCR fpscr = Fpscr;
2209        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2210                             true, true, VfpRoundNearest);
2211        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2212                           true, true, VfpRoundNearest);
2213        Fpscr = fpscr;
2214    '''
2215    threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2216    threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2217
2218    vmlsfpCode = '''
2219        FPSCR fpscr = Fpscr;
2220        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2221                             true, true, VfpRoundNearest);
2222        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2223                           true, true, VfpRoundNearest);
2224        Fpscr = fpscr;
2225    '''
2226    threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2227    threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2228
2229    vcgtfpCode = '''
2230        FPSCR fpscr = (FPSCR)Fpscr;
2231        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2232                             true, true, VfpRoundNearest);
2233        destReg = (res == 0) ? -1 : 0;
2234        if (res == 2.0)
2235            fpscr.ioc = 1;
2236        Fpscr = fpscr;
2237    '''
2238    threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2239            2, vcgtfpCode, toInt = True)
2240    threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2241            4, vcgtfpCode, toInt = True)
2242
2243    vcgefpCode = '''
2244        FPSCR fpscr = (FPSCR)Fpscr;
2245        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2246                             true, true, VfpRoundNearest);
2247        destReg = (res == 0) ? -1 : 0;
2248        if (res == 2.0)
2249            fpscr.ioc = 1;
2250        Fpscr = fpscr;
2251    '''
2252    threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2253            2, vcgefpCode, toInt = True)
2254    threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2255            4, vcgefpCode, toInt = True)
2256
2257    vacgtfpCode = '''
2258        FPSCR fpscr = (FPSCR)Fpscr;
2259        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2260                             true, true, VfpRoundNearest);
2261        destReg = (res == 0) ? -1 : 0;
2262        if (res == 2.0)
2263            fpscr.ioc = 1;
2264        Fpscr = fpscr;
2265    '''
2266    threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2267            2, vacgtfpCode, toInt = True)
2268    threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2269            4, vacgtfpCode, toInt = True)
2270
2271    vacgefpCode = '''
2272        FPSCR fpscr = (FPSCR)Fpscr;
2273        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2274                             true, true, VfpRoundNearest);
2275        destReg = (res == 0) ? -1 : 0;
2276        if (res == 2.0)
2277            fpscr.ioc = 1;
2278        Fpscr = fpscr;
2279    '''
2280    threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2281            2, vacgefpCode, toInt = True)
2282    threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2283            4, vacgefpCode, toInt = True)
2284
2285    vceqfpCode = '''
2286        FPSCR fpscr = (FPSCR)Fpscr;
2287        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2288                             true, true, VfpRoundNearest);
2289        destReg = (res == 0) ? -1 : 0;
2290        if (res == 2.0)
2291            fpscr.ioc = 1;
2292        Fpscr = fpscr;
2293    '''
2294    threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2295            2, vceqfpCode, toInt = True)
2296    threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2297            4, vceqfpCode, toInt = True)
2298
2299    vrecpsCode = '''
2300        FPSCR fpscr = Fpscr;
2301        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2302                           true, true, VfpRoundNearest);
2303        Fpscr = fpscr;
2304    '''
2305    threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2306    threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2307
2308    vrsqrtsCode = '''
2309        FPSCR fpscr = Fpscr;
2310        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2311                           true, true, VfpRoundNearest);
2312        Fpscr = fpscr;
2313    '''
2314    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2315    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2316
2317    vabdfpCode = '''
2318        FPSCR fpscr = Fpscr;
2319        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2320                             true, true, VfpRoundNearest);
2321        destReg = fabs(mid);
2322        Fpscr = fpscr;
2323    '''
2324    threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2325    threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2326
2327    twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2328    twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2329    twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2330    twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2331    twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2332
2333    twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2334    twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2335    twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2336    twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2337    twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2338
2339    twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2340    twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2341    twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2342    twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2343    twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2344
2345    twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2346    twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2347    twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2348    twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2349    twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2350    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2351            smallSignedTypes, 2, vqrdmulhCode)
2352    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2353            smallSignedTypes, 4, vqrdmulhCode)
2354
2355    vshrCode = '''
2356        if (imm >= sizeof(srcElem1) * 8) {
2357            if (srcElem1 < 0)
2358                destElem = -1;
2359            else
2360                destElem = 0;
2361        } else {
2362            destElem = srcElem1 >> imm;
2363        }
2364    '''
2365    twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2366    twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2367
2368    vsraCode = '''
2369        Element mid;;
2370        if (imm >= sizeof(srcElem1) * 8) {
2371            mid = (srcElem1 < 0) ? -1 : 0;
2372        } else {
2373            mid = srcElem1 >> imm;
2374            if (srcElem1 < 0 && mid >= 0) {
2375                mid |= -(mid & ((Element)1 <<
2376                            (sizeof(Element) * 8 - 1 - imm)));
2377            }
2378        }
2379        destElem += mid;
2380    '''
2381    twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2382    twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2383
2384    vrshrCode = '''
2385        if (imm > sizeof(srcElem1) * 8) {
2386            destElem = 0;
2387        } else if (imm) {
2388            Element rBit = bits(srcElem1, imm - 1);
2389            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2390        } else {
2391            destElem = srcElem1;
2392        }
2393    '''
2394    twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2395    twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2396
2397    vrsraCode = '''
2398        if (imm > sizeof(srcElem1) * 8) {
2399            destElem += 0;
2400        } else if (imm) {
2401            Element rBit = bits(srcElem1, imm - 1);
2402            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2403        } else {
2404            destElem += srcElem1;
2405        }
2406    '''
2407    twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2408    twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2409
2410    vsriCode = '''
2411        if (imm >= sizeof(Element) * 8)
2412            destElem = destElem;
2413        else
2414            destElem = (srcElem1 >> imm) |
2415                (destElem & ~mask(sizeof(Element) * 8 - imm));
2416    '''
2417    twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2418    twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2419
2420    vshlCode = '''
2421        if (imm >= sizeof(Element) * 8)
2422            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2423        else
2424            destElem = srcElem1 << imm;
2425    '''
2426    twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2427    twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2428
2429    vsliCode = '''
2430        if (imm >= sizeof(Element) * 8)
2431            destElem = destElem;
2432        else
2433            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2434    '''
2435    twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2436    twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2437
2438    vqshlCode = '''
2439        FPSCR fpscr = (FPSCR)Fpscr;
2440        if (imm >= sizeof(Element) * 8) {
2441            if (srcElem1 != 0) {
2442                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2443                if (srcElem1 > 0)
2444                    destElem = ~destElem;
2445                fpscr.qc = 1;
2446            } else {
2447                destElem = 0;
2448            }
2449        } else if (imm) {
2450            destElem = (srcElem1 << imm);
2451            uint64_t topBits = bits((uint64_t)srcElem1,
2452                                    sizeof(Element) * 8 - 1,
2453                                    sizeof(Element) * 8 - 1 - imm);
2454            if (topBits != 0 && topBits != mask(imm + 1)) {
2455                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2456                if (srcElem1 > 0)
2457                    destElem = ~destElem;
2458                fpscr.qc = 1;
2459            }
2460        } else {
2461            destElem = srcElem1;
2462        }
2463        Fpscr = fpscr;
2464    '''
2465    twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2466    twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2467
2468    vqshluCode = '''
2469        FPSCR fpscr = (FPSCR)Fpscr;
2470        if (imm >= sizeof(Element) * 8) {
2471            if (srcElem1 != 0) {
2472                destElem = mask(sizeof(Element) * 8);
2473                fpscr.qc = 1;
2474            } else {
2475                destElem = 0;
2476            }
2477        } else if (imm) {
2478            destElem = (srcElem1 << imm);
2479            uint64_t topBits = bits((uint64_t)srcElem1,
2480                                    sizeof(Element) * 8 - 1,
2481                                    sizeof(Element) * 8 - imm);
2482            if (topBits != 0) {
2483                destElem = mask(sizeof(Element) * 8);
2484                fpscr.qc = 1;
2485            }
2486        } else {
2487            destElem = srcElem1;
2488        }
2489        Fpscr = fpscr;
2490    '''
2491    twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2492    twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2493
2494    vqshlusCode = '''
2495        FPSCR fpscr = (FPSCR)Fpscr;
2496        if (imm >= sizeof(Element) * 8) {
2497            if (srcElem1 < 0) {
2498                destElem = 0;
2499                fpscr.qc = 1;
2500            } else if (srcElem1 > 0) {
2501                destElem = mask(sizeof(Element) * 8);
2502                fpscr.qc = 1;
2503            } else {
2504                destElem = 0;
2505            }
2506        } else if (imm) {
2507            destElem = (srcElem1 << imm);
2508            uint64_t topBits = bits((uint64_t)srcElem1,
2509                                    sizeof(Element) * 8 - 1,
2510                                    sizeof(Element) * 8 - imm);
2511            if (srcElem1 < 0) {
2512                destElem = 0;
2513                fpscr.qc = 1;
2514            } else if (topBits != 0) {
2515                destElem = mask(sizeof(Element) * 8);
2516                fpscr.qc = 1;
2517            }
2518        } else {
2519            if (srcElem1 < 0) {
2520                fpscr.qc = 1;
2521                destElem = 0;
2522            } else {
2523                destElem = srcElem1;
2524            }
2525        }
2526        Fpscr = fpscr;
2527    '''
2528    twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2529    twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2530
2531    vshrnCode = '''
2532        if (imm >= sizeof(srcElem1) * 8) {
2533            destElem = 0;
2534        } else {
2535            destElem = srcElem1 >> imm;
2536        }
2537    '''
2538    twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2539
2540    vrshrnCode = '''
2541        if (imm > sizeof(srcElem1) * 8) {
2542            destElem = 0;
2543        } else if (imm) {
2544            Element rBit = bits(srcElem1, imm - 1);
2545            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2546        } else {
2547            destElem = srcElem1;
2548        }
2549    '''
2550    twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2551
2552    vqshrnCode = '''
2553        FPSCR fpscr = (FPSCR)Fpscr;
2554        if (imm > sizeof(srcElem1) * 8) {
2555            if (srcElem1 != 0 && srcElem1 != -1)
2556                fpscr.qc = 1;
2557            destElem = 0;
2558        } else if (imm) {
2559            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2560            mid |= -(mid & ((BigElement)1 <<
2561                        (sizeof(BigElement) * 8 - 1 - imm)));
2562            if (mid != (Element)mid) {
2563                destElem = mask(sizeof(Element) * 8 - 1);
2564                if (srcElem1 < 0)
2565                    destElem = ~destElem;
2566                fpscr.qc = 1;
2567            } else {
2568                destElem = mid;
2569            }
2570        } else {
2571            destElem = srcElem1;
2572        }
2573        Fpscr = fpscr;
2574    '''
2575    twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2576
2577    vqshrunCode = '''
2578        FPSCR fpscr = (FPSCR)Fpscr;
2579        if (imm > sizeof(srcElem1) * 8) {
2580            if (srcElem1 != 0)
2581                fpscr.qc = 1;
2582            destElem = 0;
2583        } else if (imm) {
2584            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2585            if (mid != (Element)mid) {
2586                destElem = mask(sizeof(Element) * 8);
2587                fpscr.qc = 1;
2588            } else {
2589                destElem = mid;
2590            }
2591        } else {
2592            destElem = srcElem1;
2593        }
2594        Fpscr = fpscr;
2595    '''
2596    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2597            smallUnsignedTypes, vqshrunCode)
2598
2599    vqshrunsCode = '''
2600        FPSCR fpscr = (FPSCR)Fpscr;
2601        if (imm > sizeof(srcElem1) * 8) {
2602            if (srcElem1 != 0)
2603                fpscr.qc = 1;
2604            destElem = 0;
2605        } else if (imm) {
2606            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2607            if (bits(mid, sizeof(BigElement) * 8 - 1,
2608                          sizeof(Element) * 8) != 0) {
2609                if (srcElem1 < 0) {
2610                    destElem = 0;
2611                } else {
2612                    destElem = mask(sizeof(Element) * 8);
2613                }
2614                fpscr.qc = 1;
2615            } else {
2616                destElem = mid;
2617            }
2618        } else {
2619            destElem = srcElem1;
2620        }
2621        Fpscr = fpscr;
2622    '''
2623    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2624            smallSignedTypes, vqshrunsCode)
2625
2626    vqrshrnCode = '''
2627        FPSCR fpscr = (FPSCR)Fpscr;
2628        if (imm > sizeof(srcElem1) * 8) {
2629            if (srcElem1 != 0 && srcElem1 != -1)
2630                fpscr.qc = 1;
2631            destElem = 0;
2632        } else if (imm) {
2633            BigElement mid = (srcElem1 >> (imm - 1));
2634            uint64_t rBit = mid & 0x1;
2635            mid >>= 1;
2636            mid |= -(mid & ((BigElement)1 <<
2637                        (sizeof(BigElement) * 8 - 1 - imm)));
2638            mid += rBit;
2639            if (mid != (Element)mid) {
2640                destElem = mask(sizeof(Element) * 8 - 1);
2641                if (srcElem1 < 0)
2642                    destElem = ~destElem;
2643                fpscr.qc = 1;
2644            } else {
2645                destElem = mid;
2646            }
2647        } else {
2648            if (srcElem1 != (Element)srcElem1) {
2649                destElem = mask(sizeof(Element) * 8 - 1);
2650                if (srcElem1 < 0)
2651                    destElem = ~destElem;
2652                fpscr.qc = 1;
2653            } else {
2654                destElem = srcElem1;
2655            }
2656        }
2657        Fpscr = fpscr;
2658    '''
2659    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2660            smallSignedTypes, vqrshrnCode)
2661
2662    vqrshrunCode = '''
2663        FPSCR fpscr = (FPSCR)Fpscr;
2664        if (imm > sizeof(srcElem1) * 8) {
2665            if (srcElem1 != 0)
2666                fpscr.qc = 1;
2667            destElem = 0;
2668        } else if (imm) {
2669            BigElement mid = (srcElem1 >> (imm - 1));
2670            uint64_t rBit = mid & 0x1;
2671            mid >>= 1;
2672            mid += rBit;
2673            if (mid != (Element)mid) {
2674                destElem = mask(sizeof(Element) * 8);
2675                fpscr.qc = 1;
2676            } else {
2677                destElem = mid;
2678            }
2679        } else {
2680            if (srcElem1 != (Element)srcElem1) {
2681                destElem = mask(sizeof(Element) * 8 - 1);
2682                if (srcElem1 < 0)
2683                    destElem = ~destElem;
2684                fpscr.qc = 1;
2685            } else {
2686                destElem = srcElem1;
2687            }
2688        }
2689        Fpscr = fpscr;
2690    '''
2691    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2692            smallUnsignedTypes, vqrshrunCode)
2693
2694    vqrshrunsCode = '''
2695        FPSCR fpscr = (FPSCR)Fpscr;
2696        if (imm > sizeof(srcElem1) * 8) {
2697            if (srcElem1 != 0)
2698                fpscr.qc = 1;
2699            destElem = 0;
2700        } else if (imm) {
2701            BigElement mid = (srcElem1 >> (imm - 1));
2702            uint64_t rBit = mid & 0x1;
2703            mid >>= 1;
2704            mid |= -(mid & ((BigElement)1 <<
2705                            (sizeof(BigElement) * 8 - 1 - imm)));
2706            mid += rBit;
2707            if (bits(mid, sizeof(BigElement) * 8 - 1,
2708                          sizeof(Element) * 8) != 0) {
2709                if (srcElem1 < 0) {
2710                    destElem = 0;
2711                } else {
2712                    destElem = mask(sizeof(Element) * 8);
2713                }
2714                fpscr.qc = 1;
2715            } else {
2716                destElem = mid;
2717            }
2718        } else {
2719            if (srcElem1 < 0) {
2720                fpscr.qc = 1;
2721                destElem = 0;
2722            } else {
2723                destElem = srcElem1;
2724            }
2725        }
2726        Fpscr = fpscr;
2727    '''
2728    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2729            smallSignedTypes, vqrshrunsCode)
2730
2731    vshllCode = '''
2732        if (imm >= sizeof(destElem) * 8) {
2733            destElem = 0;
2734        } else {
2735            destElem = (BigElement)srcElem1 << imm;
2736        }
2737    '''
2738    twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2739
2740    vmovlCode = '''
2741        destElem = srcElem1;
2742    '''
2743    twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2744
2745    vcvt2ufxCode = '''
2746        FPSCR fpscr = Fpscr;
2747        if (flushToZero(srcElem1))
2748            fpscr.idc = 1;
2749        VfpSavedState state = prepFpState(VfpRoundNearest);
2750        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2751        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2752        __asm__ __volatile__("" :: "m" (destReg));
2753        finishVfp(fpscr, state, true);
2754        Fpscr = fpscr;
2755    '''
2756    twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2757            2, vcvt2ufxCode, toInt = True)
2758    twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2759            4, vcvt2ufxCode, toInt = True)
2760
2761    vcvt2sfxCode = '''
2762        FPSCR fpscr = Fpscr;
2763        if (flushToZero(srcElem1))
2764            fpscr.idc = 1;
2765        VfpSavedState state = prepFpState(VfpRoundNearest);
2766        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2767        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2768        __asm__ __volatile__("" :: "m" (destReg));
2769        finishVfp(fpscr, state, true);
2770        Fpscr = fpscr;
2771    '''
2772    twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2773            2, vcvt2sfxCode, toInt = True)
2774    twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2775            4, vcvt2sfxCode, toInt = True)
2776
2777    vcvtu2fpCode = '''
2778        FPSCR fpscr = Fpscr;
2779        VfpSavedState state = prepFpState(VfpRoundNearest);
2780        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2781        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2782        __asm__ __volatile__("" :: "m" (destElem));
2783        finishVfp(fpscr, state, true);
2784        Fpscr = fpscr;
2785    '''
2786    twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2787            2, vcvtu2fpCode, fromInt = True)
2788    twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2789            4, vcvtu2fpCode, fromInt = True)
2790
2791    vcvts2fpCode = '''
2792        FPSCR fpscr = Fpscr;
2793        VfpSavedState state = prepFpState(VfpRoundNearest);
2794        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2795        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2796        __asm__ __volatile__("" :: "m" (destElem));
2797        finishVfp(fpscr, state, true);
2798        Fpscr = fpscr;
2799    '''
2800    twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2801            2, vcvts2fpCode, fromInt = True)
2802    twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2803            4, vcvts2fpCode, fromInt = True)
2804
2805    vcvts2hCode = '''
2806        FPSCR fpscr = Fpscr;
2807        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2808        if (flushToZero(srcFp1))
2809            fpscr.idc = 1;
2810        VfpSavedState state = prepFpState(VfpRoundNearest);
2811        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2812                                : "m" (srcFp1), "m" (destElem));
2813        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2814                              fpscr.ahp, srcFp1);
2815        __asm__ __volatile__("" :: "m" (destElem));
2816        finishVfp(fpscr, state, true);
2817        Fpscr = fpscr;
2818    '''
2819    twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2820
2821    vcvth2sCode = '''
2822        FPSCR fpscr = Fpscr;
2823        VfpSavedState state = prepFpState(VfpRoundNearest);
2824        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2825                                : "m" (srcElem1), "m" (destElem));
2826        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2827        __asm__ __volatile__("" :: "m" (destElem));
2828        finishVfp(fpscr, state, true);
2829        Fpscr = fpscr;
2830    '''
2831    twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2832
2833    vrsqrteCode = '''
2834        destElem = unsignedRSqrtEstimate(srcElem1);
2835    '''
2836    twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2837    twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2838
2839    vrsqrtefpCode = '''
2840        FPSCR fpscr = Fpscr;
2841        if (flushToZero(srcReg1))
2842            fpscr.idc = 1;
2843        destReg = fprSqrtEstimate(fpscr, srcReg1);
2844        Fpscr = fpscr;
2845    '''
2846    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2847    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2848
2849    vrecpeCode = '''
2850        destElem = unsignedRecipEstimate(srcElem1);
2851    '''
2852    twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2853    twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2854
2855    vrecpefpCode = '''
2856        FPSCR fpscr = Fpscr;
2857        if (flushToZero(srcReg1))
2858            fpscr.idc = 1;
2859        destReg = fpRecipEstimate(fpscr, srcReg1);
2860        Fpscr = fpscr;
2861    '''
2862    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2863    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2864
2865    vrev16Code = '''
2866        destElem = srcElem1;
2867        unsigned groupSize = ((1 << 1) / sizeof(Element));
2868        unsigned reverseMask = (groupSize - 1);
2869        j = i ^ reverseMask;
2870    '''
2871    twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2872    twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2873    vrev32Code = '''
2874        destElem = srcElem1;
2875        unsigned groupSize = ((1 << 2) / sizeof(Element));
2876        unsigned reverseMask = (groupSize - 1);
2877        j = i ^ reverseMask;
2878    '''
2879    twoRegMiscInst("vrev32", "NVrev32D",
2880            ("uint8_t", "uint16_t"), 2, vrev32Code)
2881    twoRegMiscInst("vrev32", "NVrev32Q",
2882            ("uint8_t", "uint16_t"), 4, vrev32Code)
2883    vrev64Code = '''
2884        destElem = srcElem1;
2885        unsigned groupSize = ((1 << 3) / sizeof(Element));
2886        unsigned reverseMask = (groupSize - 1);
2887        j = i ^ reverseMask;
2888    '''
2889    twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2890    twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2891
2892    vpaddlCode = '''
2893        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2894    '''
2895    twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2896    twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2897
2898    vpadalCode = '''
2899        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2900    '''
2901    twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2902    twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2903
2904    vclsCode = '''
2905        unsigned count = 0;
2906        if (srcElem1 < 0) {
2907            srcElem1 <<= 1;
2908            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2909                count++;
2910                srcElem1 <<= 1;
2911            }
2912        } else {
2913            srcElem1 <<= 1;
2914            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2915                count++;
2916                srcElem1 <<= 1;
2917            }
2918        }
2919        destElem = count;
2920    '''
2921    twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2922    twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2923
2924    vclzCode = '''
2925        unsigned count = 0;
2926        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2927            count++;
2928            srcElem1 <<= 1;
2929        }
2930        destElem = count;
2931    '''
2932    twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2933    twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2934
2935    vcntCode = '''
2936        unsigned count = 0;
2937        while (srcElem1 && count < sizeof(Element) * 8) {
2938            count += srcElem1 & 0x1;
2939            srcElem1 >>= 1;
2940        }
2941        destElem = count;
2942    '''
2943    twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2944    twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2945
2946    vmvnCode = '''
2947        destElem = ~srcElem1;
2948    '''
2949    twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2950    twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2951
2952    vqabsCode = '''
2953        FPSCR fpscr = (FPSCR)Fpscr;
2954        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2955            fpscr.qc = 1;
2956            destElem = ~srcElem1;
2957        } else if (srcElem1 < 0) {
2958            destElem = -srcElem1;
2959        } else {
2960            destElem = srcElem1;
2961        }
2962        Fpscr = fpscr;
2963    '''
2964    twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2965    twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2966
2967    vqnegCode = '''
2968        FPSCR fpscr = (FPSCR)Fpscr;
2969        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2970            fpscr.qc = 1;
2971            destElem = ~srcElem1;
2972        } else {
2973            destElem = -srcElem1;
2974        }
2975        Fpscr = fpscr;
2976    '''
2977    twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2978    twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2979
2980    vabsCode = '''
2981        if (srcElem1 < 0) {
2982            destElem = -srcElem1;
2983        } else {
2984            destElem = srcElem1;
2985        }
2986    '''
2987    twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2988    twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2989    vabsfpCode = '''
2990        union
2991        {
2992            uint32_t i;
2993            float f;
2994        } cStruct;
2995        cStruct.f = srcReg1;
2996        cStruct.i &= mask(sizeof(Element) * 8 - 1);
2997        destReg = cStruct.f;
2998    '''
2999    twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
3000    twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
3001
3002    vnegCode = '''
3003        destElem = -srcElem1;
3004    '''
3005    twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
3006    twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
3007    vnegfpCode = '''
3008        destReg = -srcReg1;
3009    '''
3010    twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
3011    twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
3012
3013    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3014    twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3015    twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3016    vcgtfpCode = '''
3017        FPSCR fpscr = (FPSCR)Fpscr;
3018        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3019                             true, true, VfpRoundNearest);
3020        destReg = (res == 0) ? -1 : 0;
3021        if (res == 2.0)
3022            fpscr.ioc = 1;
3023        Fpscr = fpscr;
3024    '''
3025    twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3026            2, vcgtfpCode, toInt = True)
3027    twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3028            4, vcgtfpCode, toInt = True)
3029
3030    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3031    twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3032    twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3033    vcgefpCode = '''
3034        FPSCR fpscr = (FPSCR)Fpscr;
3035        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3036                             true, true, VfpRoundNearest);
3037        destReg = (res == 0) ? -1 : 0;
3038        if (res == 2.0)
3039            fpscr.ioc = 1;
3040        Fpscr = fpscr;
3041    '''
3042    twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3043            2, vcgefpCode, toInt = True)
3044    twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3045            4, vcgefpCode, toInt = True)
3046
3047    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3048    twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3049    twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3050    vceqfpCode = '''
3051        FPSCR fpscr = (FPSCR)Fpscr;
3052        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3053                             true, true, VfpRoundNearest);
3054        destReg = (res == 0) ? -1 : 0;
3055        if (res == 2.0)
3056            fpscr.ioc = 1;
3057        Fpscr = fpscr;
3058    '''
3059    twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3060            2, vceqfpCode, toInt = True)
3061    twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3062            4, vceqfpCode, toInt = True)
3063
3064    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3065    twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3066    twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3067    vclefpCode = '''
3068        FPSCR fpscr = (FPSCR)Fpscr;
3069        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3070                             true, true, VfpRoundNearest);
3071        destReg = (res == 0) ? -1 : 0;
3072        if (res == 2.0)
3073            fpscr.ioc = 1;
3074        Fpscr = fpscr;
3075    '''
3076    twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3077            2, vclefpCode, toInt = True)
3078    twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3079            4, vclefpCode, toInt = True)
3080
3081    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3082    twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3083    twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3084    vcltfpCode = '''
3085        FPSCR fpscr = (FPSCR)Fpscr;
3086        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3087                             true, true, VfpRoundNearest);
3088        destReg = (res == 0) ? -1 : 0;
3089        if (res == 2.0)
3090            fpscr.ioc = 1;
3091        Fpscr = fpscr;
3092    '''
3093    twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3094            2, vcltfpCode, toInt = True)
3095    twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3096            4, vcltfpCode, toInt = True)
3097
3098    vswpCode = '''
3099        FloatRegBits mid;
3100        for (unsigned r = 0; r < rCount; r++) {
3101            mid = srcReg1.regs[r];
3102            srcReg1.regs[r] = destReg.regs[r];
3103            destReg.regs[r] = mid;
3104        }
3105    '''
3106    twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3107    twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3108
3109    vtrnCode = '''
3110        Element mid;
3111        for (unsigned i = 0; i < eCount; i += 2) {
3112            mid = srcReg1.elements[i];
3113            srcReg1.elements[i] = destReg.elements[i + 1];
3114            destReg.elements[i + 1] = mid;
3115        }
3116    '''
3117    twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3118    twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3119
3120    vuzpCode = '''
3121        Element mid[eCount];
3122        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3123        for (unsigned i = 0; i < eCount / 2; i++) {
3124            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3125            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3126            destReg.elements[i] = destReg.elements[2 * i];
3127        }
3128        for (unsigned i = 0; i < eCount / 2; i++) {
3129            destReg.elements[eCount / 2 + i] = mid[2 * i];
3130        }
3131    '''
3132    twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3133    twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3134
3135    vzipCode = '''
3136        Element mid[eCount];
3137        memcpy(&mid, &destReg, sizeof(destReg));
3138        for (unsigned i = 0; i < eCount / 2; i++) {
3139            destReg.elements[2 * i] = mid[i];
3140            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3141        }
3142        for (int i = 0; i < eCount / 2; i++) {
3143            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3144            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3145        }
3146    '''
3147    twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3148    twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3149
3150    vmovnCode = 'destElem = srcElem1;'
3151    twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3152
3153    vdupCode = 'destElem = srcElem1;'
3154    twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3155    twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3156
3157    def vdupGprInst(name, Name, types, rCount):
3158        global header_output, exec_output
3159        eWalkCode = '''
3160        RegVect destReg;
3161        for (unsigned i = 0; i < eCount; i++) {
3162            destReg.elements[i] = htog((Element)Op1);
3163        }
3164        '''
3165        for reg in range(rCount):
3166            eWalkCode += '''
3167            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3168            ''' % { "reg" : reg }
3169        iop = InstObjParams(name, Name,
3170                            "RegRegOp",
3171                            { "code": eWalkCode,
3172                              "r_count": rCount,
3173                              "predicate_test": predicateTest }, [])
3174        header_output += NeonRegRegOpDeclare.subst(iop)
3175        exec_output += NeonEqualRegExecute.subst(iop)
3176        for type in types:
3177            substDict = { "targs" : type,
3178                          "class_name" : Name }
3179            exec_output += NeonExecDeclare.subst(substDict)
3180    vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3181    vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3182
3183    vmovCode = 'destElem = imm;'
3184    oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3185    oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3186
3187    vorrCode = 'destElem |= imm;'
3188    oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3189    oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3190
3191    vmvnCode = 'destElem = ~imm;'
3192    oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3193    oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3194
3195    vbicCode = 'destElem &= ~imm;'
3196    oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3197    oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3198
3199    vqmovnCode = '''
3200    FPSCR fpscr = (FPSCR)Fpscr;
3201    destElem = srcElem1;
3202    if ((BigElement)destElem != srcElem1) {
3203        fpscr.qc = 1;
3204        destElem = mask(sizeof(Element) * 8 - 1);
3205        if (srcElem1 < 0)
3206            destElem = ~destElem;
3207    }
3208    Fpscr = fpscr;
3209    '''
3210    twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3211
3212    vqmovunCode = '''
3213    FPSCR fpscr = (FPSCR)Fpscr;
3214    destElem = srcElem1;
3215    if ((BigElement)destElem != srcElem1) {
3216        fpscr.qc = 1;
3217        destElem = mask(sizeof(Element) * 8);
3218    }
3219    Fpscr = fpscr;
3220    '''
3221    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3222            smallUnsignedTypes, vqmovunCode)
3223
3224    vqmovunsCode = '''
3225    FPSCR fpscr = (FPSCR)Fpscr;
3226    destElem = srcElem1;
3227    if (srcElem1 < 0 ||
3228            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3229        fpscr.qc = 1;
3230        destElem = mask(sizeof(Element) * 8);
3231        if (srcElem1 < 0)
3232            destElem = ~destElem;
3233    }
3234    Fpscr = fpscr;
3235    '''
3236    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3237            smallSignedTypes, vqmovunsCode)
3238
3239    def buildVext(name, Name, types, rCount, op):
3240        global header_output, exec_output
3241        eWalkCode = '''
3242        RegVect srcReg1, srcReg2, destReg;
3243        '''
3244        for reg in range(rCount):
3245            eWalkCode += '''
3246                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3247                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3248            ''' % { "reg" : reg }
3249        eWalkCode += op
3250        for reg in range(rCount):
3251            eWalkCode += '''
3252            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3253            ''' % { "reg" : reg }
3254        iop = InstObjParams(name, Name,
3255                            "RegRegRegImmOp",
3256                            { "code": eWalkCode,
3257                              "r_count": rCount,
3258                              "predicate_test": predicateTest }, [])
3259        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3260        exec_output += NeonEqualRegExecute.subst(iop)
3261        for type in types:
3262            substDict = { "targs" : type,
3263                          "class_name" : Name }
3264            exec_output += NeonExecDeclare.subst(substDict)
3265
3266    vextCode = '''
3267        for (unsigned i = 0; i < eCount; i++) {
3268            unsigned index = i + imm;
3269            if (index < eCount) {
3270                destReg.elements[i] = srcReg1.elements[index];
3271            } else {
3272                index -= eCount;
3273                assert(index < eCount);
3274                destReg.elements[i] = srcReg2.elements[index];
3275            }
3276        }
3277    '''
3278    buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3279    buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3280
3281    def buildVtbxl(name, Name, length, isVtbl):
3282        global header_output, decoder_output, exec_output
3283        code = '''
3284            union
3285            {
3286                uint8_t bytes[32];
3287                FloatRegBits regs[8];
3288            } table;
3289
3290            union
3291            {
3292                uint8_t bytes[8];
3293                FloatRegBits regs[2];
3294            } destReg, srcReg2;
3295
3296            const unsigned length = %(length)d;
3297            const bool isVtbl = %(isVtbl)s;
3298
3299            srcReg2.regs[0] = htog(FpOp2P0.uw);
3300            srcReg2.regs[1] = htog(FpOp2P1.uw);
3301
3302            destReg.regs[0] = htog(FpDestP0.uw);
3303            destReg.regs[1] = htog(FpDestP1.uw);
3304        ''' % { "length" : length, "isVtbl" : isVtbl }
3305        for reg in range(8):
3306            if reg < length * 2:
3307                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3308                        { "reg" : reg }
3309            else:
3310                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3311        code += '''
3312        for (unsigned i = 0; i < sizeof(destReg); i++) {
3313            uint8_t index = srcReg2.bytes[i];
3314            if (index < 8 * length) {
3315                destReg.bytes[i] = table.bytes[index];
3316            } else {
3317                if (isVtbl)
3318                    destReg.bytes[i] = 0;
3319                // else destReg.bytes[i] unchanged
3320            }
3321        }
3322
3323        FpDestP0.uw = gtoh(destReg.regs[0]);
3324        FpDestP1.uw = gtoh(destReg.regs[1]);
3325        '''
3326        iop = InstObjParams(name, Name,
3327                            "RegRegRegOp",
3328                            { "code": code,
3329                              "predicate_test": predicateTest }, [])
3330        header_output += RegRegRegOpDeclare.subst(iop)
3331        decoder_output += RegRegRegOpConstructor.subst(iop)
3332        exec_output += PredOpExecute.subst(iop)
3333
3334    buildVtbxl("vtbl", "NVtbl1", 1, "true")
3335    buildVtbxl("vtbl", "NVtbl2", 2, "true")
3336    buildVtbxl("vtbl", "NVtbl3", 3, "true")
3337    buildVtbxl("vtbl", "NVtbl4", 4, "true")
3338
3339    buildVtbxl("vtbx", "NVtbx1", 1, "false")
3340    buildVtbxl("vtbx", "NVtbx2", 2, "false")
3341    buildVtbxl("vtbx", "NVtbx3", 3, "false")
3342    buildVtbxl("vtbx", "NVtbx4", 4, "false")
3343}};
3344