neon.isa revision 7644:62873d5c2bfc
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (isSnan(op1) || isSnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (isSnan(op1) || isSnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (isSnan(op1) || isSnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (isSnan(op1) || isSnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (isSnan(op1) || isSnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (isSnan(op1) || isSnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623
624    header_output = ""
625    exec_output = ""
626
627    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630    signedTypes = smallSignedTypes + ("int64_t",)
631    smallTypes = smallUnsignedTypes + smallSignedTypes
632    allTypes = unsignedTypes + signedTypes
633
634    def threeEqualRegInst(name, Name, types, rCount, op,
635                          readDest=False, pairwise=False):
636        global header_output, exec_output
637        eWalkCode = simdEnabledCheckCode + '''
638        RegVect srcReg1, srcReg2, destReg;
639        '''
640        for reg in range(rCount):
641            eWalkCode += '''
642                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
643                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
644            ''' % { "reg" : reg }
645            if readDest:
646                eWalkCode += '''
647                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
648                ''' % { "reg" : reg }
649        readDestCode = ''
650        if readDest:
651            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652        if pairwise:
653            eWalkCode += '''
654            for (unsigned i = 0; i < eCount; i++) {
655                Element srcElem1 = gtoh(2 * i < eCount ?
656                                        srcReg1.elements[2 * i] :
657                                        srcReg2.elements[2 * i - eCount]);
658                Element srcElem2 = gtoh(2 * i < eCount ?
659                                        srcReg1.elements[2 * i + 1] :
660                                        srcReg2.elements[2 * i + 1 - eCount]);
661                Element destElem;
662                %(readDest)s
663                %(op)s
664                destReg.elements[i] = htog(destElem);
665            }
666            ''' % { "op" : op, "readDest" : readDestCode }
667        else:
668            eWalkCode += '''
669            for (unsigned i = 0; i < eCount; i++) {
670                Element srcElem1 = gtoh(srcReg1.elements[i]);
671                Element srcElem2 = gtoh(srcReg2.elements[i]);
672                Element destElem;
673                %(readDest)s
674                %(op)s
675                destReg.elements[i] = htog(destElem);
676            }
677            ''' % { "op" : op, "readDest" : readDestCode }
678        for reg in range(rCount):
679            eWalkCode += '''
680            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681            ''' % { "reg" : reg }
682        iop = InstObjParams(name, Name,
683                            "RegRegRegOp",
684                            { "code": eWalkCode,
685                              "r_count": rCount,
686                              "predicate_test": predicateTest }, [])
687        header_output += NeonRegRegRegOpDeclare.subst(iop)
688        exec_output += NeonEqualRegExecute.subst(iop)
689        for type in types:
690            substDict = { "targs" : type,
691                          "class_name" : Name }
692            exec_output += NeonExecDeclare.subst(substDict)
693
694    def threeEqualRegInstFp(name, Name, types, rCount, op,
695                            readDest=False, pairwise=False, toInt=False):
696        global header_output, exec_output
697        eWalkCode = simdEnabledCheckCode + '''
698        typedef FloatReg FloatVect[rCount];
699        FloatVect srcRegs1, srcRegs2;
700        '''
701        if toInt:
702            eWalkCode += 'RegVect destRegs;\n'
703        else:
704            eWalkCode += 'FloatVect destRegs;\n'
705        for reg in range(rCount):
706            eWalkCode += '''
707                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
708                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
709            ''' % { "reg" : reg }
710            if readDest:
711                if toInt:
712                    eWalkCode += '''
713                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
714                    ''' % { "reg" : reg }
715                else:
716                    eWalkCode += '''
717                        destRegs[%(reg)d] = FpDestP%(reg)d;
718                    ''' % { "reg" : reg }
719        readDestCode = ''
720        if readDest:
721            readDestCode = 'destReg = destRegs[r];'
722        destType = 'FloatReg'
723        writeDest = 'destRegs[r] = destReg;'
724        if toInt:
725            destType = 'FloatRegBits'
726            writeDest = 'destRegs.regs[r] = destReg;'
727        if pairwise:
728            eWalkCode += '''
729            for (unsigned r = 0; r < rCount; r++) {
730                FloatReg srcReg1 = (2 * r < rCount) ?
731                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
732                FloatReg srcReg2 = (2 * r < rCount) ?
733                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
734                %(destType)s destReg;
735                %(readDest)s
736                %(op)s
737                %(writeDest)s
738            }
739            ''' % { "op" : op,
740                    "readDest" : readDestCode,
741                    "destType" : destType,
742                    "writeDest" : writeDest }
743        else:
744            eWalkCode += '''
745            for (unsigned r = 0; r < rCount; r++) {
746                FloatReg srcReg1 = srcRegs1[r];
747                FloatReg srcReg2 = srcRegs2[r];
748                %(destType)s destReg;
749                %(readDest)s
750                %(op)s
751                %(writeDest)s
752            }
753            ''' % { "op" : op,
754                    "readDest" : readDestCode,
755                    "destType" : destType,
756                    "writeDest" : writeDest }
757        for reg in range(rCount):
758            if toInt:
759                eWalkCode += '''
760                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
761                ''' % { "reg" : reg }
762            else:
763                eWalkCode += '''
764                FpDestP%(reg)d = destRegs[%(reg)d];
765                ''' % { "reg" : reg }
766        iop = InstObjParams(name, Name,
767                            "FpRegRegRegOp",
768                            { "code": eWalkCode,
769                              "r_count": rCount,
770                              "predicate_test": predicateTest }, [])
771        header_output += NeonRegRegRegOpDeclare.subst(iop)
772        exec_output += NeonEqualRegExecute.subst(iop)
773        for type in types:
774            substDict = { "targs" : type,
775                          "class_name" : Name }
776            exec_output += NeonExecDeclare.subst(substDict)
777
778    def threeUnequalRegInst(name, Name, types, op,
779                            bigSrc1, bigSrc2, bigDest, readDest):
780        global header_output, exec_output
781        src1Cnt = src2Cnt = destCnt = 2
782        src1Prefix = src2Prefix = destPrefix = ''
783        if bigSrc1:
784            src1Cnt = 4
785            src1Prefix = 'Big'
786        if bigSrc2:
787            src2Cnt = 4
788            src2Prefix = 'Big'
789        if bigDest:
790            destCnt = 4
791            destPrefix = 'Big'
792        eWalkCode = simdEnabledCheckCode + '''
793            %sRegVect srcReg1;
794            %sRegVect srcReg2;
795            %sRegVect destReg;
796        ''' % (src1Prefix, src2Prefix, destPrefix)
797        for reg in range(src1Cnt):
798            eWalkCode += '''
799                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
800            ''' % { "reg" : reg }
801        for reg in range(src2Cnt):
802            eWalkCode += '''
803                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
804            ''' % { "reg" : reg }
805        if readDest:
806            for reg in range(destCnt):
807                eWalkCode += '''
808                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
809                ''' % { "reg" : reg }
810        readDestCode = ''
811        if readDest:
812            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
813        eWalkCode += '''
814        for (unsigned i = 0; i < eCount; i++) {
815            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
816            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
817            %(destPrefix)sElement destElem;
818            %(readDest)s
819            %(op)s
820            destReg.elements[i] = htog(destElem);
821        }
822        ''' % { "op" : op, "readDest" : readDestCode,
823                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
824                "destPrefix" : destPrefix }
825        for reg in range(destCnt):
826            eWalkCode += '''
827            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
828            ''' % { "reg" : reg }
829        iop = InstObjParams(name, Name,
830                            "RegRegRegOp",
831                            { "code": eWalkCode,
832                              "r_count": 2,
833                              "predicate_test": predicateTest }, [])
834        header_output += NeonRegRegRegOpDeclare.subst(iop)
835        exec_output += NeonUnequalRegExecute.subst(iop)
836        for type in types:
837            substDict = { "targs" : type,
838                          "class_name" : Name }
839            exec_output += NeonExecDeclare.subst(substDict)
840
841    def threeRegNarrowInst(name, Name, types, op, readDest=False):
842        threeUnequalRegInst(name, Name, types, op,
843                            True, True, False, readDest)
844
845    def threeRegLongInst(name, Name, types, op, readDest=False):
846        threeUnequalRegInst(name, Name, types, op,
847                            False, False, True, readDest)
848
849    def threeRegWideInst(name, Name, types, op, readDest=False):
850        threeUnequalRegInst(name, Name, types, op,
851                            True, False, True, readDest)
852
853    def twoEqualRegInst(name, Name, types, rCount, op, readDest=False):
854        global header_output, exec_output
855        eWalkCode = simdEnabledCheckCode + '''
856        RegVect srcReg1, srcReg2, destReg;
857        '''
858        for reg in range(rCount):
859            eWalkCode += '''
860                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
861                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
862            ''' % { "reg" : reg }
863            if readDest:
864                eWalkCode += '''
865                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
866                ''' % { "reg" : reg }
867        readDestCode = ''
868        if readDest:
869            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
870        eWalkCode += '''
871        assert(imm >= 0 && imm < eCount);
872        for (unsigned i = 0; i < eCount; i++) {
873            Element srcElem1 = gtoh(srcReg1.elements[i]);
874            Element srcElem2 = gtoh(srcReg2.elements[imm]);
875            Element destElem;
876            %(readDest)s
877            %(op)s
878            destReg.elements[i] = htog(destElem);
879        }
880        ''' % { "op" : op, "readDest" : readDestCode }
881        for reg in range(rCount):
882            eWalkCode += '''
883            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
884            ''' % { "reg" : reg }
885        iop = InstObjParams(name, Name,
886                            "RegRegRegImmOp",
887                            { "code": eWalkCode,
888                              "r_count": rCount,
889                              "predicate_test": predicateTest }, [])
890        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
891        exec_output += NeonEqualRegExecute.subst(iop)
892        for type in types:
893            substDict = { "targs" : type,
894                          "class_name" : Name }
895            exec_output += NeonExecDeclare.subst(substDict)
896
897    def twoRegLongInst(name, Name, types, op, readDest=False):
898        global header_output, exec_output
899        rCount = 2
900        eWalkCode = simdEnabledCheckCode + '''
901        RegVect srcReg1, srcReg2;
902        BigRegVect destReg;
903        '''
904        for reg in range(rCount):
905            eWalkCode += '''
906                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
907                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
908            ''' % { "reg" : reg }
909        if readDest:
910            for reg in range(2 * rCount):
911                eWalkCode += '''
912                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
913                ''' % { "reg" : reg }
914        readDestCode = ''
915        if readDest:
916            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
917        eWalkCode += '''
918        assert(imm >= 0 && imm < eCount);
919        for (unsigned i = 0; i < eCount; i++) {
920            Element srcElem1 = gtoh(srcReg1.elements[i]);
921            Element srcElem2 = gtoh(srcReg2.elements[imm]);
922            BigElement destElem;
923            %(readDest)s
924            %(op)s
925            destReg.elements[i] = htog(destElem);
926        }
927        ''' % { "op" : op, "readDest" : readDestCode }
928        for reg in range(2 * rCount):
929            eWalkCode += '''
930            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
931            ''' % { "reg" : reg }
932        iop = InstObjParams(name, Name,
933                            "RegRegRegImmOp",
934                            { "code": eWalkCode,
935                              "r_count": rCount,
936                              "predicate_test": predicateTest }, [])
937        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
938        exec_output += NeonUnequalRegExecute.subst(iop)
939        for type in types:
940            substDict = { "targs" : type,
941                          "class_name" : Name }
942            exec_output += NeonExecDeclare.subst(substDict)
943
944    def twoEqualRegInstFp(name, Name, types, rCount, op, readDest=False):
945        global header_output, exec_output
946        eWalkCode = simdEnabledCheckCode + '''
947        typedef FloatReg FloatVect[rCount];
948        FloatVect srcRegs1, srcRegs2, destRegs;
949        '''
950        for reg in range(rCount):
951            eWalkCode += '''
952                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
953                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
954            ''' % { "reg" : reg }
955            if readDest:
956                eWalkCode += '''
957                    destRegs[%(reg)d] = FpDestP%(reg)d;
958                ''' % { "reg" : reg }
959        readDestCode = ''
960        if readDest:
961            readDestCode = 'destReg = destRegs[i];'
962        eWalkCode += '''
963        assert(imm >= 0 && imm < rCount);
964        for (unsigned i = 0; i < rCount; i++) {
965            FloatReg srcReg1 = srcRegs1[i];
966            FloatReg srcReg2 = srcRegs2[imm];
967            FloatReg destReg;
968            %(readDest)s
969            %(op)s
970            destRegs[i] = destReg;
971        }
972        ''' % { "op" : op, "readDest" : readDestCode }
973        for reg in range(rCount):
974            eWalkCode += '''
975            FpDestP%(reg)d = destRegs[%(reg)d];
976            ''' % { "reg" : reg }
977        iop = InstObjParams(name, Name,
978                            "FpRegRegRegImmOp",
979                            { "code": eWalkCode,
980                              "r_count": rCount,
981                              "predicate_test": predicateTest }, [])
982        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
983        exec_output += NeonEqualRegExecute.subst(iop)
984        for type in types:
985            substDict = { "targs" : type,
986                          "class_name" : Name }
987            exec_output += NeonExecDeclare.subst(substDict)
988
989    def twoRegShiftInst(name, Name, types, rCount, op,
990            readDest=False, toInt=False, fromInt=False):
991        global header_output, exec_output
992        eWalkCode = simdEnabledCheckCode + '''
993        RegVect srcRegs1, destRegs;
994        '''
995        for reg in range(rCount):
996            eWalkCode += '''
997                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
998            ''' % { "reg" : reg }
999            if readDest:
1000                eWalkCode += '''
1001                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1002                ''' % { "reg" : reg }
1003        readDestCode = ''
1004        if readDest:
1005            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1006            if toInt:
1007                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1008        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1009        if fromInt:
1010            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1011        declDest = 'Element destElem;'
1012        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1013        if toInt:
1014            declDest = 'FloatRegBits destReg;'
1015            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1016        eWalkCode += '''
1017        for (unsigned i = 0; i < eCount; i++) {
1018            %(readOp)s
1019            %(declDest)s
1020            %(readDest)s
1021            %(op)s
1022            %(writeDest)s
1023        }
1024        ''' % { "readOp" : readOpCode,
1025                "declDest" : declDest,
1026                "readDest" : readDestCode,
1027                "op" : op,
1028                "writeDest" : writeDestCode }
1029        for reg in range(rCount):
1030            eWalkCode += '''
1031            FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1032            ''' % { "reg" : reg }
1033        iop = InstObjParams(name, Name,
1034                            "RegRegImmOp",
1035                            { "code": eWalkCode,
1036                              "r_count": rCount,
1037                              "predicate_test": predicateTest }, [])
1038        header_output += NeonRegRegImmOpDeclare.subst(iop)
1039        exec_output += NeonEqualRegExecute.subst(iop)
1040        for type in types:
1041            substDict = { "targs" : type,
1042                          "class_name" : Name }
1043            exec_output += NeonExecDeclare.subst(substDict)
1044
1045    def twoRegNarrowShiftInst(name, Name, types, op, readDest=False):
1046        global header_output, exec_output
1047        eWalkCode = simdEnabledCheckCode + '''
1048        BigRegVect srcReg1;
1049        RegVect destReg;
1050        '''
1051        for reg in range(4):
1052            eWalkCode += '''
1053                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1054            ''' % { "reg" : reg }
1055        if readDest:
1056            for reg in range(2):
1057                eWalkCode += '''
1058                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1059                ''' % { "reg" : reg }
1060        readDestCode = ''
1061        if readDest:
1062            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1063        eWalkCode += '''
1064        for (unsigned i = 0; i < eCount; i++) {
1065            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1066            Element destElem;
1067            %(readDest)s
1068            %(op)s
1069            destReg.elements[i] = htog(destElem);
1070        }
1071        ''' % { "op" : op, "readDest" : readDestCode }
1072        for reg in range(2):
1073            eWalkCode += '''
1074            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1075            ''' % { "reg" : reg }
1076        iop = InstObjParams(name, Name,
1077                            "RegRegImmOp",
1078                            { "code": eWalkCode,
1079                              "r_count": 2,
1080                              "predicate_test": predicateTest }, [])
1081        header_output += NeonRegRegImmOpDeclare.subst(iop)
1082        exec_output += NeonUnequalRegExecute.subst(iop)
1083        for type in types:
1084            substDict = { "targs" : type,
1085                          "class_name" : Name }
1086            exec_output += NeonExecDeclare.subst(substDict)
1087
1088    def twoRegLongShiftInst(name, Name, types, op, readDest=False):
1089        global header_output, exec_output
1090        eWalkCode = simdEnabledCheckCode + '''
1091        RegVect srcReg1;
1092        BigRegVect destReg;
1093        '''
1094        for reg in range(2):
1095            eWalkCode += '''
1096                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1097            ''' % { "reg" : reg }
1098        if readDest:
1099            for reg in range(4):
1100                eWalkCode += '''
1101                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1102                ''' % { "reg" : reg }
1103        readDestCode = ''
1104        if readDest:
1105            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1106        eWalkCode += '''
1107        for (unsigned i = 0; i < eCount; i++) {
1108            Element srcElem1 = gtoh(srcReg1.elements[i]);
1109            BigElement destElem;
1110            %(readDest)s
1111            %(op)s
1112            destReg.elements[i] = htog(destElem);
1113        }
1114        ''' % { "op" : op, "readDest" : readDestCode }
1115        for reg in range(4):
1116            eWalkCode += '''
1117            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1118            ''' % { "reg" : reg }
1119        iop = InstObjParams(name, Name,
1120                            "RegRegImmOp",
1121                            { "code": eWalkCode,
1122                              "r_count": 2,
1123                              "predicate_test": predicateTest }, [])
1124        header_output += NeonRegRegImmOpDeclare.subst(iop)
1125        exec_output += NeonUnequalRegExecute.subst(iop)
1126        for type in types:
1127            substDict = { "targs" : type,
1128                          "class_name" : Name }
1129            exec_output += NeonExecDeclare.subst(substDict)
1130
1131    def twoRegMiscInst(name, Name, types, rCount, op, readDest=False):
1132        global header_output, exec_output
1133        eWalkCode = simdEnabledCheckCode + '''
1134        RegVect srcReg1, destReg;
1135        '''
1136        for reg in range(rCount):
1137            eWalkCode += '''
1138                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1139            ''' % { "reg" : reg }
1140            if readDest:
1141                eWalkCode += '''
1142                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1143                ''' % { "reg" : reg }
1144        readDestCode = ''
1145        if readDest:
1146            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1147        eWalkCode += '''
1148        for (unsigned i = 0; i < eCount; i++) {
1149            unsigned j = i;
1150            Element srcElem1 = gtoh(srcReg1.elements[i]);
1151            Element destElem;
1152            %(readDest)s
1153            %(op)s
1154            destReg.elements[j] = htog(destElem);
1155        }
1156        ''' % { "op" : op, "readDest" : readDestCode }
1157        for reg in range(rCount):
1158            eWalkCode += '''
1159            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1160            ''' % { "reg" : reg }
1161        iop = InstObjParams(name, Name,
1162                            "RegRegOp",
1163                            { "code": eWalkCode,
1164                              "r_count": rCount,
1165                              "predicate_test": predicateTest }, [])
1166        header_output += NeonRegRegOpDeclare.subst(iop)
1167        exec_output += NeonEqualRegExecute.subst(iop)
1168        for type in types:
1169            substDict = { "targs" : type,
1170                          "class_name" : Name }
1171            exec_output += NeonExecDeclare.subst(substDict)
1172
1173    def twoRegMiscScInst(name, Name, types, rCount, op, readDest=False):
1174        global header_output, exec_output
1175        eWalkCode = simdEnabledCheckCode + '''
1176        RegVect srcReg1, destReg;
1177        '''
1178        for reg in range(rCount):
1179            eWalkCode += '''
1180                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1181            ''' % { "reg" : reg }
1182            if readDest:
1183                eWalkCode += '''
1184                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1185                ''' % { "reg" : reg }
1186        readDestCode = ''
1187        if readDest:
1188            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1189        eWalkCode += '''
1190        for (unsigned i = 0; i < eCount; i++) {
1191            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1192            Element destElem;
1193            %(readDest)s
1194            %(op)s
1195            destReg.elements[i] = htog(destElem);
1196        }
1197        ''' % { "op" : op, "readDest" : readDestCode }
1198        for reg in range(rCount):
1199            eWalkCode += '''
1200            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1201            ''' % { "reg" : reg }
1202        iop = InstObjParams(name, Name,
1203                            "RegRegImmOp",
1204                            { "code": eWalkCode,
1205                              "r_count": rCount,
1206                              "predicate_test": predicateTest }, [])
1207        header_output += NeonRegRegImmOpDeclare.subst(iop)
1208        exec_output += NeonEqualRegExecute.subst(iop)
1209        for type in types:
1210            substDict = { "targs" : type,
1211                          "class_name" : Name }
1212            exec_output += NeonExecDeclare.subst(substDict)
1213
1214    def twoRegMiscScramble(name, Name, types, rCount, op, readDest=False):
1215        global header_output, exec_output
1216        eWalkCode = simdEnabledCheckCode + '''
1217        RegVect srcReg1, destReg;
1218        '''
1219        for reg in range(rCount):
1220            eWalkCode += '''
1221                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1222                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1223            ''' % { "reg" : reg }
1224            if readDest:
1225                eWalkCode += '''
1226                ''' % { "reg" : reg }
1227        readDestCode = ''
1228        if readDest:
1229            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1230        eWalkCode += op
1231        for reg in range(rCount):
1232            eWalkCode += '''
1233            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1234            FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1235            ''' % { "reg" : reg }
1236        iop = InstObjParams(name, Name,
1237                            "RegRegOp",
1238                            { "code": eWalkCode,
1239                              "r_count": rCount,
1240                              "predicate_test": predicateTest }, [])
1241        header_output += NeonRegRegOpDeclare.subst(iop)
1242        exec_output += NeonEqualRegExecute.subst(iop)
1243        for type in types:
1244            substDict = { "targs" : type,
1245                          "class_name" : Name }
1246            exec_output += NeonExecDeclare.subst(substDict)
1247
1248    def twoRegMiscInstFp(name, Name, types, rCount, op,
1249            readDest=False, toInt=False):
1250        global header_output, exec_output
1251        eWalkCode = simdEnabledCheckCode + '''
1252        typedef FloatReg FloatVect[rCount];
1253        FloatVect srcRegs1;
1254        '''
1255        if toInt:
1256            eWalkCode += 'RegVect destRegs;\n'
1257        else:
1258            eWalkCode += 'FloatVect destRegs;\n'
1259        for reg in range(rCount):
1260            eWalkCode += '''
1261                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1262            ''' % { "reg" : reg }
1263            if readDest:
1264                if toInt:
1265                    eWalkCode += '''
1266                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1267                    ''' % { "reg" : reg }
1268                else:
1269                    eWalkCode += '''
1270                        destRegs[%(reg)d] = FpDestP%(reg)d;
1271                    ''' % { "reg" : reg }
1272        readDestCode = ''
1273        if readDest:
1274            readDestCode = 'destReg = destRegs[i];'
1275        destType = 'FloatReg'
1276        writeDest = 'destRegs[r] = destReg;'
1277        if toInt:
1278            destType = 'FloatRegBits'
1279            writeDest = 'destRegs.regs[r] = destReg;'
1280        eWalkCode += '''
1281        for (unsigned r = 0; r < rCount; r++) {
1282            FloatReg srcReg1 = srcRegs1[r];
1283            %(destType)s destReg;
1284            %(readDest)s
1285            %(op)s
1286            %(writeDest)s
1287        }
1288        ''' % { "op" : op,
1289                "readDest" : readDestCode,
1290                "destType" : destType,
1291                "writeDest" : writeDest }
1292        for reg in range(rCount):
1293            if toInt:
1294                eWalkCode += '''
1295                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1296                ''' % { "reg" : reg }
1297            else:
1298                eWalkCode += '''
1299                FpDestP%(reg)d = destRegs[%(reg)d];
1300                ''' % { "reg" : reg }
1301        iop = InstObjParams(name, Name,
1302                            "FpRegRegOp",
1303                            { "code": eWalkCode,
1304                              "r_count": rCount,
1305                              "predicate_test": predicateTest }, [])
1306        header_output += NeonRegRegOpDeclare.subst(iop)
1307        exec_output += NeonEqualRegExecute.subst(iop)
1308        for type in types:
1309            substDict = { "targs" : type,
1310                          "class_name" : Name }
1311            exec_output += NeonExecDeclare.subst(substDict)
1312
1313    def twoRegCondenseInst(name, Name, types, rCount, op, readDest=False):
1314        global header_output, exec_output
1315        eWalkCode = simdEnabledCheckCode + '''
1316        RegVect srcRegs;
1317        BigRegVect destReg;
1318        '''
1319        for reg in range(rCount):
1320            eWalkCode += '''
1321                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1322            ''' % { "reg" : reg }
1323            if readDest:
1324                eWalkCode += '''
1325                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1326                ''' % { "reg" : reg }
1327        readDestCode = ''
1328        if readDest:
1329            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1330        eWalkCode += '''
1331        for (unsigned i = 0; i < eCount / 2; i++) {
1332            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1333            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1334            BigElement destElem;
1335            %(readDest)s
1336            %(op)s
1337            destReg.elements[i] = htog(destElem);
1338        }
1339        ''' % { "op" : op, "readDest" : readDestCode }
1340        for reg in range(rCount):
1341            eWalkCode += '''
1342            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1343            ''' % { "reg" : reg }
1344        iop = InstObjParams(name, Name,
1345                            "RegRegOp",
1346                            { "code": eWalkCode,
1347                              "r_count": rCount,
1348                              "predicate_test": predicateTest }, [])
1349        header_output += NeonRegRegOpDeclare.subst(iop)
1350        exec_output += NeonUnequalRegExecute.subst(iop)
1351        for type in types:
1352            substDict = { "targs" : type,
1353                          "class_name" : Name }
1354            exec_output += NeonExecDeclare.subst(substDict)
1355
1356    def twoRegNarrowMiscInst(name, Name, types, op, readDest=False):
1357        global header_output, exec_output
1358        eWalkCode = simdEnabledCheckCode + '''
1359        BigRegVect srcReg1;
1360        RegVect destReg;
1361        '''
1362        for reg in range(4):
1363            eWalkCode += '''
1364                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1365            ''' % { "reg" : reg }
1366        if readDest:
1367            for reg in range(2):
1368                eWalkCode += '''
1369                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1370                ''' % { "reg" : reg }
1371        readDestCode = ''
1372        if readDest:
1373            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1374        eWalkCode += '''
1375        for (unsigned i = 0; i < eCount; i++) {
1376            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1377            Element destElem;
1378            %(readDest)s
1379            %(op)s
1380            destReg.elements[i] = htog(destElem);
1381        }
1382        ''' % { "op" : op, "readDest" : readDestCode }
1383        for reg in range(2):
1384            eWalkCode += '''
1385            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1386            ''' % { "reg" : reg }
1387        iop = InstObjParams(name, Name,
1388                            "RegRegOp",
1389                            { "code": eWalkCode,
1390                              "r_count": 2,
1391                              "predicate_test": predicateTest }, [])
1392        header_output += NeonRegRegOpDeclare.subst(iop)
1393        exec_output += NeonUnequalRegExecute.subst(iop)
1394        for type in types:
1395            substDict = { "targs" : type,
1396                          "class_name" : Name }
1397            exec_output += NeonExecDeclare.subst(substDict)
1398
1399    def oneRegImmInst(name, Name, types, rCount, op, readDest=False):
1400        global header_output, exec_output
1401        eWalkCode = simdEnabledCheckCode + '''
1402        RegVect destReg;
1403        '''
1404        if readDest:
1405            for reg in range(rCount):
1406                eWalkCode += '''
1407                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1408                ''' % { "reg" : reg }
1409        readDestCode = ''
1410        if readDest:
1411            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1412        eWalkCode += '''
1413        for (unsigned i = 0; i < eCount; i++) {
1414            Element destElem;
1415            %(readDest)s
1416            %(op)s
1417            destReg.elements[i] = htog(destElem);
1418        }
1419        ''' % { "op" : op, "readDest" : readDestCode }
1420        for reg in range(rCount):
1421            eWalkCode += '''
1422            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1423            ''' % { "reg" : reg }
1424        iop = InstObjParams(name, Name,
1425                            "RegImmOp",
1426                            { "code": eWalkCode,
1427                              "r_count": rCount,
1428                              "predicate_test": predicateTest }, [])
1429        header_output += NeonRegImmOpDeclare.subst(iop)
1430        exec_output += NeonEqualRegExecute.subst(iop)
1431        for type in types:
1432            substDict = { "targs" : type,
1433                          "class_name" : Name }
1434            exec_output += NeonExecDeclare.subst(substDict)
1435
1436    def twoRegLongMiscInst(name, Name, types, op, readDest=False):
1437        global header_output, exec_output
1438        eWalkCode = simdEnabledCheckCode + '''
1439        RegVect srcReg1;
1440        BigRegVect destReg;
1441        '''
1442        for reg in range(2):
1443            eWalkCode += '''
1444                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1445            ''' % { "reg" : reg }
1446        if readDest:
1447            for reg in range(4):
1448                eWalkCode += '''
1449                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1450                ''' % { "reg" : reg }
1451        readDestCode = ''
1452        if readDest:
1453            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1454        eWalkCode += '''
1455        for (unsigned i = 0; i < eCount; i++) {
1456            Element srcElem1 = gtoh(srcReg1.elements[i]);
1457            BigElement destElem;
1458            %(readDest)s
1459            %(op)s
1460            destReg.elements[i] = htog(destElem);
1461        }
1462        ''' % { "op" : op, "readDest" : readDestCode }
1463        for reg in range(4):
1464            eWalkCode += '''
1465            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1466            ''' % { "reg" : reg }
1467        iop = InstObjParams(name, Name,
1468                            "RegRegOp",
1469                            { "code": eWalkCode,
1470                              "r_count": 2,
1471                              "predicate_test": predicateTest }, [])
1472        header_output += NeonRegRegOpDeclare.subst(iop)
1473        exec_output += NeonUnequalRegExecute.subst(iop)
1474        for type in types:
1475            substDict = { "targs" : type,
1476                          "class_name" : Name }
1477            exec_output += NeonExecDeclare.subst(substDict)
1478
1479    vhaddCode = '''
1480        Element carryBit =
1481            (((unsigned)srcElem1 & 0x1) +
1482             ((unsigned)srcElem2 & 0x1)) >> 1;
1483        // Use division instead of a shift to ensure the sign extension works
1484        // right. The compiler will figure out if it can be a shift. Mask the
1485        // inputs so they get truncated correctly.
1486        destElem = (((srcElem1 & ~(Element)1) / 2) +
1487                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1488    '''
1489    threeEqualRegInst("vhadd", "VhaddD", allTypes, 2, vhaddCode)
1490    threeEqualRegInst("vhadd", "VhaddQ", allTypes, 4, vhaddCode)
1491
1492    vrhaddCode = '''
1493        Element carryBit =
1494            (((unsigned)srcElem1 & 0x1) +
1495             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1496        // Use division instead of a shift to ensure the sign extension works
1497        // right. The compiler will figure out if it can be a shift. Mask the
1498        // inputs so they get truncated correctly.
1499        destElem = (((srcElem1 & ~(Element)1) / 2) +
1500                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1501    '''
1502    threeEqualRegInst("vrhadd", "VrhaddD", allTypes, 2, vrhaddCode)
1503    threeEqualRegInst("vrhadd", "VrhaddQ", allTypes, 4, vrhaddCode)
1504
1505    vhsubCode = '''
1506        Element barrowBit =
1507            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1508        // Use division instead of a shift to ensure the sign extension works
1509        // right. The compiler will figure out if it can be a shift. Mask the
1510        // inputs so they get truncated correctly.
1511        destElem = (((srcElem1 & ~(Element)1) / 2) -
1512                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1513    '''
1514    threeEqualRegInst("vhsub", "VhsubD", allTypes, 2, vhsubCode)
1515    threeEqualRegInst("vhsub", "VhsubQ", allTypes, 4, vhsubCode)
1516
1517    vandCode = '''
1518        destElem = srcElem1 & srcElem2;
1519    '''
1520    threeEqualRegInst("vand", "VandD", unsignedTypes, 2, vandCode)
1521    threeEqualRegInst("vand", "VandQ", unsignedTypes, 4, vandCode)
1522
1523    vbicCode = '''
1524        destElem = srcElem1 & ~srcElem2;
1525    '''
1526    threeEqualRegInst("vbic", "VbicD", unsignedTypes, 2, vbicCode)
1527    threeEqualRegInst("vbic", "VbicQ", unsignedTypes, 4, vbicCode)
1528
1529    vorrCode = '''
1530        destElem = srcElem1 | srcElem2;
1531    '''
1532    threeEqualRegInst("vorr", "VorrD", unsignedTypes, 2, vorrCode)
1533    threeEqualRegInst("vorr", "VorrQ", unsignedTypes, 4, vorrCode)
1534
1535    threeEqualRegInst("vmov", "VmovD", unsignedTypes, 2, vorrCode)
1536    threeEqualRegInst("vmov", "VmovQ", unsignedTypes, 4, vorrCode)
1537
1538    vornCode = '''
1539        destElem = srcElem1 | ~srcElem2;
1540    '''
1541    threeEqualRegInst("vorn", "VornD", unsignedTypes, 2, vornCode)
1542    threeEqualRegInst("vorn", "VornQ", unsignedTypes, 4, vornCode)
1543
1544    veorCode = '''
1545        destElem = srcElem1 ^ srcElem2;
1546    '''
1547    threeEqualRegInst("veor", "VeorD", unsignedTypes, 2, veorCode)
1548    threeEqualRegInst("veor", "VeorQ", unsignedTypes, 4, veorCode)
1549
1550    vbifCode = '''
1551        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1552    '''
1553    threeEqualRegInst("vbif", "VbifD", unsignedTypes, 2, vbifCode, True)
1554    threeEqualRegInst("vbif", "VbifQ", unsignedTypes, 4, vbifCode, True)
1555    vbitCode = '''
1556        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1557    '''
1558    threeEqualRegInst("vbit", "VbitD", unsignedTypes, 2, vbitCode, True)
1559    threeEqualRegInst("vbit", "VbitQ", unsignedTypes, 4, vbitCode, True)
1560    vbslCode = '''
1561        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1562    '''
1563    threeEqualRegInst("vbsl", "VbslD", unsignedTypes, 2, vbslCode, True)
1564    threeEqualRegInst("vbsl", "VbslQ", unsignedTypes, 4, vbslCode, True)
1565
1566    vmaxCode = '''
1567        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1568    '''
1569    threeEqualRegInst("vmax", "VmaxD", allTypes, 2, vmaxCode)
1570    threeEqualRegInst("vmax", "VmaxQ", allTypes, 4, vmaxCode)
1571
1572    vminCode = '''
1573        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1574    '''
1575    threeEqualRegInst("vmin", "VminD", allTypes, 2, vminCode)
1576    threeEqualRegInst("vmin", "VminQ", allTypes, 4, vminCode)
1577
1578    vaddCode = '''
1579        destElem = srcElem1 + srcElem2;
1580    '''
1581    threeEqualRegInst("vadd", "NVaddD", unsignedTypes, 2, vaddCode)
1582    threeEqualRegInst("vadd", "NVaddQ", unsignedTypes, 4, vaddCode)
1583
1584    threeEqualRegInst("vpadd", "NVpaddD", unsignedTypes,
1585                      2, vaddCode, pairwise=True)
1586    threeEqualRegInst("vpadd", "NVpaddQ", unsignedTypes,
1587                      4, vaddCode, pairwise=True)
1588    vaddlwCode = '''
1589        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1590    '''
1591    threeRegLongInst("vaddl", "Vaddl", smallTypes, vaddlwCode)
1592    threeRegWideInst("vaddw", "Vaddw", smallTypes, vaddlwCode)
1593    vaddhnCode = '''
1594        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1595                   (sizeof(Element) * 8);
1596    '''
1597    threeRegNarrowInst("vaddhn", "Vaddhn", smallTypes, vaddhnCode)
1598    vraddhnCode = '''
1599        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1600                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1601                   (sizeof(Element) * 8);
1602    '''
1603    threeRegNarrowInst("vraddhn", "Vraddhn", smallTypes, vraddhnCode)
1604
1605    vsubCode = '''
1606        destElem = srcElem1 - srcElem2;
1607    '''
1608    threeEqualRegInst("vsub", "NVsubD", unsignedTypes, 2, vsubCode)
1609    threeEqualRegInst("vsub", "NVsubQ", unsignedTypes, 4, vsubCode)
1610    vsublwCode = '''
1611        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1612    '''
1613    threeRegLongInst("vsubl", "Vsubl", smallTypes, vsublwCode)
1614    threeRegWideInst("vsubw", "Vsubw", smallTypes, vsublwCode)
1615
1616    vqaddUCode = '''
1617        destElem = srcElem1 + srcElem2;
1618        FPSCR fpscr = (FPSCR)Fpscr;
1619        if (destElem < srcElem1 || destElem < srcElem2) {
1620            destElem = (Element)(-1);
1621            fpscr.qc = 1;
1622        }
1623        Fpscr = fpscr;
1624    '''
1625    threeEqualRegInst("vqadd", "VqaddUD", unsignedTypes, 2, vqaddUCode)
1626    threeEqualRegInst("vqadd", "VqaddUQ", unsignedTypes, 4, vqaddUCode)
1627    vsubhnCode = '''
1628        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1629                   (sizeof(Element) * 8);
1630    '''
1631    threeRegNarrowInst("vsubhn", "Vsubhn", smallTypes, vsubhnCode)
1632    vrsubhnCode = '''
1633        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1634                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1635                   (sizeof(Element) * 8);
1636    '''
1637    threeRegNarrowInst("vrsubhn", "Vrsubhn", smallTypes, vrsubhnCode)
1638
1639    vqaddSCode = '''
1640        destElem = srcElem1 + srcElem2;
1641        FPSCR fpscr = (FPSCR)Fpscr;
1642        bool negDest = (destElem < 0);
1643        bool negSrc1 = (srcElem1 < 0);
1644        bool negSrc2 = (srcElem2 < 0);
1645        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1646            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1647            if (negDest)
1648                destElem -= 1;
1649            fpscr.qc = 1;
1650        }
1651        Fpscr = fpscr;
1652    '''
1653    threeEqualRegInst("vqadd", "VqaddSD", signedTypes, 2, vqaddSCode)
1654    threeEqualRegInst("vqadd", "VqaddSQ", signedTypes, 4, vqaddSCode)
1655
1656    vqsubUCode = '''
1657        destElem = srcElem1 - srcElem2;
1658        FPSCR fpscr = (FPSCR)Fpscr;
1659        if (destElem > srcElem1) {
1660            destElem = 0;
1661            fpscr.qc = 1;
1662        }
1663        Fpscr = fpscr;
1664    '''
1665    threeEqualRegInst("vqsub", "VqsubUD", unsignedTypes, 2, vqsubUCode)
1666    threeEqualRegInst("vqsub", "VqsubUQ", unsignedTypes, 4, vqsubUCode)
1667
1668    vqsubSCode = '''
1669        destElem = srcElem1 - srcElem2;
1670        FPSCR fpscr = (FPSCR)Fpscr;
1671        bool negDest = (destElem < 0);
1672        bool negSrc1 = (srcElem1 < 0);
1673        bool posSrc2 = (srcElem2 >= 0);
1674        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1675            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1676            if (negDest)
1677                destElem -= 1;
1678            fpscr.qc = 1;
1679        }
1680        Fpscr = fpscr;
1681    '''
1682    threeEqualRegInst("vqsub", "VqsubSD", signedTypes, 2, vqsubSCode)
1683    threeEqualRegInst("vqsub", "VqsubSQ", signedTypes, 4, vqsubSCode)
1684
1685    vcgtCode = '''
1686        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1687    '''
1688    threeEqualRegInst("vcgt", "VcgtD", allTypes, 2, vcgtCode)
1689    threeEqualRegInst("vcgt", "VcgtQ", allTypes, 4, vcgtCode)
1690
1691    vcgeCode = '''
1692        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1693    '''
1694    threeEqualRegInst("vcge", "VcgeD", allTypes, 2, vcgeCode)
1695    threeEqualRegInst("vcge", "VcgeQ", allTypes, 4, vcgeCode)
1696
1697    vceqCode = '''
1698        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1699    '''
1700    threeEqualRegInst("vceq", "VceqD", unsignedTypes, 2, vceqCode)
1701    threeEqualRegInst("vceq", "VceqQ", unsignedTypes, 4, vceqCode)
1702
1703    vshlCode = '''
1704        int16_t shiftAmt = (int8_t)srcElem2;
1705        if (shiftAmt < 0) {
1706            shiftAmt = -shiftAmt;
1707            if (shiftAmt >= sizeof(Element) * 8) {
1708                shiftAmt = sizeof(Element) * 8 - 1;
1709                destElem = 0;
1710            } else {
1711                destElem = (srcElem1 >> shiftAmt);
1712            }
1713            // Make sure the right shift sign extended when it should.
1714            if (ltz(srcElem1) && !ltz(destElem)) {
1715                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1716                                             1 - shiftAmt));
1717            }
1718        } else {
1719            if (shiftAmt >= sizeof(Element) * 8) {
1720                destElem = 0;
1721            } else {
1722                destElem = srcElem1 << shiftAmt;
1723            }
1724        }
1725    '''
1726    threeEqualRegInst("vshl", "VshlD", allTypes, 2, vshlCode)
1727    threeEqualRegInst("vshl", "VshlQ", allTypes, 4, vshlCode)
1728
1729    vrshlCode = '''
1730        int16_t shiftAmt = (int8_t)srcElem2;
1731        if (shiftAmt < 0) {
1732            shiftAmt = -shiftAmt;
1733            Element rBit = 0;
1734            if (shiftAmt <= sizeof(Element) * 8)
1735                rBit = bits(srcElem1, shiftAmt - 1);
1736            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1737                rBit = 1;
1738            if (shiftAmt >= sizeof(Element) * 8) {
1739                shiftAmt = sizeof(Element) * 8 - 1;
1740                destElem = 0;
1741            } else {
1742                destElem = (srcElem1 >> shiftAmt);
1743            }
1744            // Make sure the right shift sign extended when it should.
1745            if (ltz(srcElem1) && !ltz(destElem)) {
1746                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1747                                             1 - shiftAmt));
1748            }
1749            destElem += rBit;
1750        } else if (shiftAmt > 0) {
1751            if (shiftAmt >= sizeof(Element) * 8) {
1752                destElem = 0;
1753            } else {
1754                destElem = srcElem1 << shiftAmt;
1755            }
1756        } else {
1757            destElem = srcElem1;
1758        }
1759    '''
1760    threeEqualRegInst("vrshl", "VrshlD", allTypes, 2, vrshlCode)
1761    threeEqualRegInst("vrshl", "VrshlQ", allTypes, 4, vrshlCode)
1762
1763    vqshlUCode = '''
1764        int16_t shiftAmt = (int8_t)srcElem2;
1765        FPSCR fpscr = (FPSCR)Fpscr;
1766        if (shiftAmt < 0) {
1767            shiftAmt = -shiftAmt;
1768            if (shiftAmt >= sizeof(Element) * 8) {
1769                shiftAmt = sizeof(Element) * 8 - 1;
1770                destElem = 0;
1771            } else {
1772                destElem = (srcElem1 >> shiftAmt);
1773            }
1774        } else if (shiftAmt > 0) {
1775            if (shiftAmt >= sizeof(Element) * 8) {
1776                if (srcElem1 != 0) {
1777                    destElem = mask(sizeof(Element) * 8);
1778                    fpscr.qc = 1;
1779                } else {
1780                    destElem = 0;
1781                }
1782            } else {
1783                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1784                            sizeof(Element) * 8 - shiftAmt)) {
1785                    destElem = mask(sizeof(Element) * 8);
1786                    fpscr.qc = 1;
1787                } else {
1788                    destElem = srcElem1 << shiftAmt;
1789                }
1790            }
1791        } else {
1792            destElem = srcElem1;
1793        }
1794        Fpscr = fpscr;
1795    '''
1796    threeEqualRegInst("vqshl", "VqshlUD", unsignedTypes, 2, vqshlUCode)
1797    threeEqualRegInst("vqshl", "VqshlUQ", unsignedTypes, 4, vqshlUCode)
1798
1799    vqshlSCode = '''
1800        int16_t shiftAmt = (int8_t)srcElem2;
1801        FPSCR fpscr = (FPSCR)Fpscr;
1802        if (shiftAmt < 0) {
1803            shiftAmt = -shiftAmt;
1804            if (shiftAmt >= sizeof(Element) * 8) {
1805                shiftAmt = sizeof(Element) * 8 - 1;
1806                destElem = 0;
1807            } else {
1808                destElem = (srcElem1 >> shiftAmt);
1809            }
1810            // Make sure the right shift sign extended when it should.
1811            if (srcElem1 < 0 && destElem >= 0) {
1812                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1813                                             1 - shiftAmt));
1814            }
1815        } else if (shiftAmt > 0) {
1816            bool sat = false;
1817            if (shiftAmt >= sizeof(Element) * 8) {
1818                if (srcElem1 != 0)
1819                    sat = true;
1820                else
1821                    destElem = 0;
1822            } else {
1823                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1824                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1825                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1826                    sat = true;
1827                } else {
1828                    destElem = srcElem1 << shiftAmt;
1829                }
1830            }
1831            if (sat) {
1832                fpscr.qc = 1;
1833                destElem = mask(sizeof(Element) * 8 - 1);
1834                if (srcElem1 < 0)
1835                    destElem = ~destElem;
1836            }
1837        } else {
1838            destElem = srcElem1;
1839        }
1840        Fpscr = fpscr;
1841    '''
1842    threeEqualRegInst("vqshl", "VqshlSD", signedTypes, 2, vqshlSCode)
1843    threeEqualRegInst("vqshl", "VqshlSQ", signedTypes, 4, vqshlSCode)
1844
1845    vqrshlUCode = '''
1846        int16_t shiftAmt = (int8_t)srcElem2;
1847        FPSCR fpscr = (FPSCR)Fpscr;
1848        if (shiftAmt < 0) {
1849            shiftAmt = -shiftAmt;
1850            Element rBit = 0;
1851            if (shiftAmt <= sizeof(Element) * 8)
1852                rBit = bits(srcElem1, shiftAmt - 1);
1853            if (shiftAmt >= sizeof(Element) * 8) {
1854                shiftAmt = sizeof(Element) * 8 - 1;
1855                destElem = 0;
1856            } else {
1857                destElem = (srcElem1 >> shiftAmt);
1858            }
1859            destElem += rBit;
1860        } else {
1861            if (shiftAmt >= sizeof(Element) * 8) {
1862                if (srcElem1 != 0) {
1863                    destElem = mask(sizeof(Element) * 8);
1864                    fpscr.qc = 1;
1865                } else {
1866                    destElem = 0;
1867                }
1868            } else {
1869                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1870                            sizeof(Element) * 8 - shiftAmt)) {
1871                    destElem = mask(sizeof(Element) * 8);
1872                    fpscr.qc = 1;
1873                } else {
1874                    destElem = srcElem1 << shiftAmt;
1875                }
1876            }
1877        }
1878        Fpscr = fpscr;
1879    '''
1880    threeEqualRegInst("vqrshl", "VqrshlUD", unsignedTypes, 2, vqrshlUCode)
1881    threeEqualRegInst("vqrshl", "VqrshlUQ", unsignedTypes, 4, vqrshlUCode)
1882
1883    vqrshlSCode = '''
1884        int16_t shiftAmt = (int8_t)srcElem2;
1885        FPSCR fpscr = (FPSCR)Fpscr;
1886        if (shiftAmt < 0) {
1887            shiftAmt = -shiftAmt;
1888            Element rBit = 0;
1889            if (shiftAmt <= sizeof(Element) * 8)
1890                rBit = bits(srcElem1, shiftAmt - 1);
1891            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1892                rBit = 1;
1893            if (shiftAmt >= sizeof(Element) * 8) {
1894                shiftAmt = sizeof(Element) * 8 - 1;
1895                destElem = 0;
1896            } else {
1897                destElem = (srcElem1 >> shiftAmt);
1898            }
1899            // Make sure the right shift sign extended when it should.
1900            if (srcElem1 < 0 && destElem >= 0) {
1901                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1902                                             1 - shiftAmt));
1903            }
1904            destElem += rBit;
1905        } else if (shiftAmt > 0) {
1906            bool sat = false;
1907            if (shiftAmt >= sizeof(Element) * 8) {
1908                if (srcElem1 != 0)
1909                    sat = true;
1910                else
1911                    destElem = 0;
1912            } else {
1913                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1914                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1915                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1916                    sat = true;
1917                } else {
1918                    destElem = srcElem1 << shiftAmt;
1919                }
1920            }
1921            if (sat) {
1922                fpscr.qc = 1;
1923                destElem = mask(sizeof(Element) * 8 - 1);
1924                if (srcElem1 < 0)
1925                    destElem = ~destElem;
1926            }
1927        } else {
1928            destElem = srcElem1;
1929        }
1930        Fpscr = fpscr;
1931    '''
1932    threeEqualRegInst("vqrshl", "VqrshlSD", signedTypes, 2, vqrshlSCode)
1933    threeEqualRegInst("vqrshl", "VqrshlSQ", signedTypes, 4, vqrshlSCode)
1934
1935    vabaCode = '''
1936        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1937                                            (srcElem2 - srcElem1);
1938    '''
1939    threeEqualRegInst("vaba", "VabaD", allTypes, 2, vabaCode, True)
1940    threeEqualRegInst("vaba", "VabaQ", allTypes, 4, vabaCode, True)
1941    vabalCode = '''
1942        destElem += (srcElem1 > srcElem2) ?
1943            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1944            ((BigElement)srcElem2 - (BigElement)srcElem1);
1945    '''
1946    threeRegLongInst("vabal", "Vabal", smallTypes, vabalCode, True)
1947
1948    vabdCode = '''
1949        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1950                                           (srcElem2 - srcElem1);
1951    '''
1952    threeEqualRegInst("vabd", "VabdD", allTypes, 2, vabdCode)
1953    threeEqualRegInst("vabd", "VabdQ", allTypes, 4, vabdCode)
1954    vabdlCode = '''
1955        destElem = (srcElem1 > srcElem2) ?
1956            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1957            ((BigElement)srcElem2 - (BigElement)srcElem1);
1958    '''
1959    threeRegLongInst("vabdl", "Vabdl", smallTypes, vabdlCode)
1960
1961    vtstCode = '''
1962        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1963    '''
1964    threeEqualRegInst("vtst", "VtstD", unsignedTypes, 2, vtstCode)
1965    threeEqualRegInst("vtst", "VtstQ", unsignedTypes, 4, vtstCode)
1966
1967    vmulCode = '''
1968        destElem = srcElem1 * srcElem2;
1969    '''
1970    threeEqualRegInst("vmul", "NVmulD", allTypes, 2, vmulCode)
1971    threeEqualRegInst("vmul", "NVmulQ", allTypes, 4, vmulCode)
1972    vmullCode = '''
1973        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1974    '''
1975    threeRegLongInst("vmull", "Vmull", smallTypes, vmullCode)
1976
1977    vmlaCode = '''
1978        destElem = destElem + srcElem1 * srcElem2;
1979    '''
1980    threeEqualRegInst("vmla", "NVmlaD", allTypes, 2, vmlaCode, True)
1981    threeEqualRegInst("vmla", "NVmlaQ", allTypes, 4, vmlaCode, True)
1982    vmlalCode = '''
1983        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
1984    '''
1985    threeRegLongInst("vmlal", "Vmlal", smallTypes, vmlalCode, True)
1986
1987    vqdmlalCode = '''
1988        FPSCR fpscr = (FPSCR)Fpscr;
1989        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
1990        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
1991        Element halfNeg = maxNeg / 2;
1992        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
1993            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
1994            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
1995            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
1996            fpscr.qc = 1;
1997        }
1998        bool negPreDest = ltz(destElem);
1999        destElem += midElem;
2000        bool negDest = ltz(destElem);
2001        bool negMid = ltz(midElem);
2002        if (negPreDest == negMid && negMid != negDest) {
2003            destElem = mask(sizeof(BigElement) * 8 - 1);
2004            if (negPreDest)
2005                destElem = ~destElem;
2006            fpscr.qc = 1;
2007        }
2008        Fpscr = fpscr;
2009    '''
2010    threeRegLongInst("vqdmlal", "Vqdmlal", smallTypes, vqdmlalCode, True)
2011
2012    vqdmlslCode = '''
2013        FPSCR fpscr = (FPSCR)Fpscr;
2014        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2015        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2016        Element halfNeg = maxNeg / 2;
2017        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2018            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2019            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2020            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2021            fpscr.qc = 1;
2022        }
2023        bool negPreDest = ltz(destElem);
2024        destElem -= midElem;
2025        bool negDest = ltz(destElem);
2026        bool posMid = ltz((BigElement)-midElem);
2027        if (negPreDest == posMid && posMid != negDest) {
2028            destElem = mask(sizeof(BigElement) * 8 - 1);
2029            if (negPreDest)
2030                destElem = ~destElem;
2031            fpscr.qc = 1;
2032        }
2033        Fpscr = fpscr;
2034    '''
2035    threeRegLongInst("vqdmlsl", "Vqdmlsl", smallTypes, vqdmlslCode, True)
2036
2037    vqdmullCode = '''
2038        FPSCR fpscr = (FPSCR)Fpscr;
2039        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2040        if (srcElem1 == srcElem2 &&
2041                srcElem1 == (Element)((Element)1 <<
2042                    (Element)(sizeof(Element) * 8 - 1))) {
2043            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2044            fpscr.qc = 1;
2045        }
2046        Fpscr = fpscr;
2047    '''
2048    threeRegLongInst("vqdmull", "Vqdmull", smallTypes, vqdmullCode)
2049
2050    vmlsCode = '''
2051        destElem = destElem - srcElem1 * srcElem2;
2052    '''
2053    threeEqualRegInst("vmls", "NVmlsD", allTypes, 2, vmlsCode, True)
2054    threeEqualRegInst("vmls", "NVmlsQ", allTypes, 4, vmlsCode, True)
2055    vmlslCode = '''
2056        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2057    '''
2058    threeRegLongInst("vmlsl", "Vmlsl", smallTypes, vmlslCode, True)
2059
2060    vmulpCode = '''
2061        destElem = 0;
2062        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2063            if (bits(srcElem2, j))
2064                destElem ^= srcElem1 << j;
2065        }
2066    '''
2067    threeEqualRegInst("vmul", "NVmulpD", unsignedTypes, 2, vmulpCode)
2068    threeEqualRegInst("vmul", "NVmulpQ", unsignedTypes, 4, vmulpCode)
2069    vmullpCode = '''
2070        destElem = 0;
2071        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2072            if (bits(srcElem2, j))
2073                destElem ^= (BigElement)srcElem1 << j;
2074        }
2075    '''
2076    threeRegLongInst("vmull", "Vmullp", smallUnsignedTypes, vmullpCode)
2077
2078    threeEqualRegInst("vpmax", "VpmaxD", allTypes, 2, vmaxCode, pairwise=True)
2079    threeEqualRegInst("vpmax", "VpmaxQ", allTypes, 4, vmaxCode, pairwise=True)
2080
2081    threeEqualRegInst("vpmin", "VpminD", allTypes, 2, vminCode, pairwise=True)
2082    threeEqualRegInst("vpmin", "VpminQ", allTypes, 4, vminCode, pairwise=True)
2083
2084    vqdmulhCode = '''
2085        FPSCR fpscr = (FPSCR)Fpscr;
2086        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2087                   (sizeof(Element) * 8);
2088        if (srcElem1 == srcElem2 &&
2089                srcElem1 == (Element)((Element)1 <<
2090                    (sizeof(Element) * 8 - 1))) {
2091            destElem = ~srcElem1;
2092            fpscr.qc = 1;
2093        }
2094        Fpscr = fpscr;
2095    '''
2096    threeEqualRegInst("vqdmulh", "VqdmulhD", smallSignedTypes, 2, vqdmulhCode)
2097    threeEqualRegInst("vqdmulh", "VqdmulhQ", smallSignedTypes, 4, vqdmulhCode)
2098
2099    vqrdmulhCode = '''
2100        FPSCR fpscr = (FPSCR)Fpscr;
2101        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2102                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2103                   (sizeof(Element) * 8);
2104        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2105        Element halfNeg = maxNeg / 2;
2106        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2107            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2108            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2109            if (destElem < 0) {
2110                destElem = mask(sizeof(Element) * 8 - 1);
2111            } else {
2112                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2113            }
2114            fpscr.qc = 1;
2115        }
2116        Fpscr = fpscr;
2117    '''
2118    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2119            smallSignedTypes, 2, vqrdmulhCode)
2120    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2121            smallSignedTypes, 4, vqrdmulhCode)
2122
2123    vmaxfpCode = '''
2124        FPSCR fpscr = (FPSCR)Fpscr;
2125        bool done;
2126        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2127        if (!done) {
2128            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2129                               true, true, VfpRoundNearest);
2130        } else if (flushToZero(srcReg1, srcReg2)) {
2131            fpscr.idc = 1;
2132        }
2133        Fpscr = fpscr;
2134    '''
2135    threeEqualRegInstFp("vmax", "VmaxDFp", ("float",), 2, vmaxfpCode)
2136    threeEqualRegInstFp("vmax", "VmaxQFp", ("float",), 4, vmaxfpCode)
2137
2138    vminfpCode = '''
2139        FPSCR fpscr = (FPSCR)Fpscr;
2140        bool done;
2141        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2142        if (!done) {
2143            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2144                               true, true, VfpRoundNearest);
2145        } else if (flushToZero(srcReg1, srcReg2)) {
2146            fpscr.idc = 1;
2147        }
2148        Fpscr = fpscr;
2149    '''
2150    threeEqualRegInstFp("vmin", "VminDFp", ("float",), 2, vminfpCode)
2151    threeEqualRegInstFp("vmin", "VminQFp", ("float",), 4, vminfpCode)
2152
2153    threeEqualRegInstFp("vpmax", "VpmaxDFp", ("float",),
2154                        2, vmaxfpCode, pairwise=True)
2155    threeEqualRegInstFp("vpmax", "VpmaxQFp", ("float",),
2156                        4, vmaxfpCode, pairwise=True)
2157
2158    threeEqualRegInstFp("vpmin", "VpminDFp", ("float",),
2159                        2, vminfpCode, pairwise=True)
2160    threeEqualRegInstFp("vpmin", "VpminQFp", ("float",),
2161                        4, vminfpCode, pairwise=True)
2162
2163    vaddfpCode = '''
2164        FPSCR fpscr = Fpscr;
2165        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2166                           true, true, VfpRoundNearest);
2167        Fpscr = fpscr;
2168    '''
2169    threeEqualRegInstFp("vadd", "VaddDFp", ("float",), 2, vaddfpCode)
2170    threeEqualRegInstFp("vadd", "VaddQFp", ("float",), 4, vaddfpCode)
2171
2172    threeEqualRegInstFp("vpadd", "VpaddDFp", ("float",),
2173                        2, vaddfpCode, pairwise=True)
2174    threeEqualRegInstFp("vpadd", "VpaddQFp", ("float",),
2175                        4, vaddfpCode, pairwise=True)
2176
2177    vsubfpCode = '''
2178        FPSCR fpscr = Fpscr;
2179        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2180                           true, true, VfpRoundNearest);
2181        Fpscr = fpscr;
2182    '''
2183    threeEqualRegInstFp("vsub", "VsubDFp", ("float",), 2, vsubfpCode)
2184    threeEqualRegInstFp("vsub", "VsubQFp", ("float",), 4, vsubfpCode)
2185
2186    vmulfpCode = '''
2187        FPSCR fpscr = Fpscr;
2188        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2189                           true, true, VfpRoundNearest);
2190        Fpscr = fpscr;
2191    '''
2192    threeEqualRegInstFp("vmul", "NVmulDFp", ("float",), 2, vmulfpCode)
2193    threeEqualRegInstFp("vmul", "NVmulQFp", ("float",), 4, vmulfpCode)
2194
2195    vmlafpCode = '''
2196        FPSCR fpscr = Fpscr;
2197        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2198                             true, true, VfpRoundNearest);
2199        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2200                           true, true, VfpRoundNearest);
2201        Fpscr = fpscr;
2202    '''
2203    threeEqualRegInstFp("vmla", "NVmlaDFp", ("float",), 2, vmlafpCode, True)
2204    threeEqualRegInstFp("vmla", "NVmlaQFp", ("float",), 4, vmlafpCode, True)
2205
2206    vmlsfpCode = '''
2207        FPSCR fpscr = Fpscr;
2208        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2209                             true, true, VfpRoundNearest);
2210        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2211                           true, true, VfpRoundNearest);
2212        Fpscr = fpscr;
2213    '''
2214    threeEqualRegInstFp("vmls", "NVmlsDFp", ("float",), 2, vmlsfpCode, True)
2215    threeEqualRegInstFp("vmls", "NVmlsQFp", ("float",), 4, vmlsfpCode, True)
2216
2217    vcgtfpCode = '''
2218        FPSCR fpscr = (FPSCR)Fpscr;
2219        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2220                             true, true, VfpRoundNearest);
2221        destReg = (res == 0) ? -1 : 0;
2222        if (res == 2.0)
2223            fpscr.ioc = 1;
2224        Fpscr = fpscr;
2225    '''
2226    threeEqualRegInstFp("vcgt", "VcgtDFp", ("float",),
2227            2, vcgtfpCode, toInt = True)
2228    threeEqualRegInstFp("vcgt", "VcgtQFp", ("float",),
2229            4, vcgtfpCode, toInt = True)
2230
2231    vcgefpCode = '''
2232        FPSCR fpscr = (FPSCR)Fpscr;
2233        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2234                             true, true, VfpRoundNearest);
2235        destReg = (res == 0) ? -1 : 0;
2236        if (res == 2.0)
2237            fpscr.ioc = 1;
2238        Fpscr = fpscr;
2239    '''
2240    threeEqualRegInstFp("vcge", "VcgeDFp", ("float",),
2241            2, vcgefpCode, toInt = True)
2242    threeEqualRegInstFp("vcge", "VcgeQFp", ("float",),
2243            4, vcgefpCode, toInt = True)
2244
2245    vacgtfpCode = '''
2246        FPSCR fpscr = (FPSCR)Fpscr;
2247        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2248                             true, true, VfpRoundNearest);
2249        destReg = (res == 0) ? -1 : 0;
2250        if (res == 2.0)
2251            fpscr.ioc = 1;
2252        Fpscr = fpscr;
2253    '''
2254    threeEqualRegInstFp("vacgt", "VacgtDFp", ("float",),
2255            2, vacgtfpCode, toInt = True)
2256    threeEqualRegInstFp("vacgt", "VacgtQFp", ("float",),
2257            4, vacgtfpCode, toInt = True)
2258
2259    vacgefpCode = '''
2260        FPSCR fpscr = (FPSCR)Fpscr;
2261        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2262                             true, true, VfpRoundNearest);
2263        destReg = (res == 0) ? -1 : 0;
2264        if (res == 2.0)
2265            fpscr.ioc = 1;
2266        Fpscr = fpscr;
2267    '''
2268    threeEqualRegInstFp("vacge", "VacgeDFp", ("float",),
2269            2, vacgefpCode, toInt = True)
2270    threeEqualRegInstFp("vacge", "VacgeQFp", ("float",),
2271            4, vacgefpCode, toInt = True)
2272
2273    vceqfpCode = '''
2274        FPSCR fpscr = (FPSCR)Fpscr;
2275        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2276                             true, true, VfpRoundNearest);
2277        destReg = (res == 0) ? -1 : 0;
2278        if (res == 2.0)
2279            fpscr.ioc = 1;
2280        Fpscr = fpscr;
2281    '''
2282    threeEqualRegInstFp("vceq", "VceqDFp", ("float",),
2283            2, vceqfpCode, toInt = True)
2284    threeEqualRegInstFp("vceq", "VceqQFp", ("float",),
2285            4, vceqfpCode, toInt = True)
2286
2287    vrecpsCode = '''
2288        FPSCR fpscr = Fpscr;
2289        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2290                           true, true, VfpRoundNearest);
2291        Fpscr = fpscr;
2292    '''
2293    threeEqualRegInstFp("vrecps", "VrecpsDFp", ("float",), 2, vrecpsCode)
2294    threeEqualRegInstFp("vrecps", "VrecpsQFp", ("float",), 4, vrecpsCode)
2295
2296    vrsqrtsCode = '''
2297        FPSCR fpscr = Fpscr;
2298        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2299                           true, true, VfpRoundNearest);
2300        Fpscr = fpscr;
2301    '''
2302    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", ("float",), 2, vrsqrtsCode)
2303    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", ("float",), 4, vrsqrtsCode)
2304
2305    vabdfpCode = '''
2306        FPSCR fpscr = Fpscr;
2307        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2308                             true, true, VfpRoundNearest);
2309        destReg = fabs(mid);
2310        Fpscr = fpscr;
2311    '''
2312    threeEqualRegInstFp("vabd", "VabdDFp", ("float",), 2, vabdfpCode)
2313    threeEqualRegInstFp("vabd", "VabdQFp", ("float",), 4, vabdfpCode)
2314
2315    twoEqualRegInst("vmla", "VmlasD", unsignedTypes, 2, vmlaCode, True)
2316    twoEqualRegInst("vmla", "VmlasQ", unsignedTypes, 4, vmlaCode, True)
2317    twoEqualRegInstFp("vmla", "VmlasDFp", ("float",), 2, vmlafpCode, True)
2318    twoEqualRegInstFp("vmla", "VmlasQFp", ("float",), 4, vmlafpCode, True)
2319    twoRegLongInst("vmlal", "Vmlals", smallTypes, vmlalCode, True)
2320
2321    twoEqualRegInst("vmls", "VmlssD", allTypes, 2, vmlsCode, True)
2322    twoEqualRegInst("vmls", "VmlssQ", allTypes, 4, vmlsCode, True)
2323    twoEqualRegInstFp("vmls", "VmlssDFp", ("float",), 2, vmlsfpCode, True)
2324    twoEqualRegInstFp("vmls", "VmlssQFp", ("float",), 4, vmlsfpCode, True)
2325    twoRegLongInst("vmlsl", "Vmlsls", smallTypes, vmlslCode, True)
2326
2327    twoEqualRegInst("vmul", "VmulsD", allTypes, 2, vmulCode)
2328    twoEqualRegInst("vmul", "VmulsQ", allTypes, 4, vmulCode)
2329    twoEqualRegInstFp("vmul", "VmulsDFp", ("float",), 2, vmulfpCode)
2330    twoEqualRegInstFp("vmul", "VmulsQFp", ("float",), 4, vmulfpCode)
2331    twoRegLongInst("vmull", "Vmulls", smallTypes, vmullCode)
2332
2333    twoRegLongInst("vqdmull", "Vqdmulls", smallTypes, vqdmullCode)
2334    twoRegLongInst("vqdmlal", "Vqdmlals", smallTypes, vqdmlalCode, True)
2335    twoRegLongInst("vqdmlsl", "Vqdmlsls", smallTypes, vqdmlslCode, True)
2336    twoEqualRegInst("vqdmulh", "VqdmulhsD", smallSignedTypes, 2, vqdmulhCode)
2337    twoEqualRegInst("vqdmulh", "VqdmulhsQ", smallSignedTypes, 4, vqdmulhCode)
2338    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2339            smallSignedTypes, 2, vqrdmulhCode)
2340    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2341            smallSignedTypes, 4, vqrdmulhCode)
2342
2343    vshrCode = '''
2344        if (imm >= sizeof(srcElem1) * 8) {
2345            if (ltz(srcElem1))
2346                destElem = -1;
2347            else
2348                destElem = 0;
2349        } else {
2350            destElem = srcElem1 >> imm;
2351        }
2352    '''
2353    twoRegShiftInst("vshr", "NVshrD", allTypes, 2, vshrCode)
2354    twoRegShiftInst("vshr", "NVshrQ", allTypes, 4, vshrCode)
2355
2356    vsraCode = '''
2357        Element mid;;
2358        if (imm >= sizeof(srcElem1) * 8) {
2359            mid = ltz(srcElem1) ? -1 : 0;
2360        } else {
2361            mid = srcElem1 >> imm;
2362            if (ltz(srcElem1) && !ltz(mid)) {
2363                mid |= -(mid & ((Element)1 <<
2364                            (sizeof(Element) * 8 - 1 - imm)));
2365            }
2366        }
2367        destElem += mid;
2368    '''
2369    twoRegShiftInst("vsra", "NVsraD", allTypes, 2, vsraCode, True)
2370    twoRegShiftInst("vsra", "NVsraQ", allTypes, 4, vsraCode, True)
2371
2372    vrshrCode = '''
2373        if (imm > sizeof(srcElem1) * 8) {
2374            destElem = 0;
2375        } else if (imm) {
2376            Element rBit = bits(srcElem1, imm - 1);
2377            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2378        } else {
2379            destElem = srcElem1;
2380        }
2381    '''
2382    twoRegShiftInst("vrshr", "NVrshrD", allTypes, 2, vrshrCode)
2383    twoRegShiftInst("vrshr", "NVrshrQ", allTypes, 4, vrshrCode)
2384
2385    vrsraCode = '''
2386        if (imm > sizeof(srcElem1) * 8) {
2387            destElem += 0;
2388        } else if (imm) {
2389            Element rBit = bits(srcElem1, imm - 1);
2390            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2391        } else {
2392            destElem += srcElem1;
2393        }
2394    '''
2395    twoRegShiftInst("vrsra", "NVrsraD", allTypes, 2, vrsraCode, True)
2396    twoRegShiftInst("vrsra", "NVrsraQ", allTypes, 4, vrsraCode, True)
2397
2398    vsriCode = '''
2399        if (imm >= sizeof(Element) * 8)
2400            destElem = destElem;
2401        else
2402            destElem = (srcElem1 >> imm) |
2403                (destElem & ~mask(sizeof(Element) * 8 - imm));
2404    '''
2405    twoRegShiftInst("vsri", "NVsriD", unsignedTypes, 2, vsriCode, True)
2406    twoRegShiftInst("vsri", "NVsriQ", unsignedTypes, 4, vsriCode, True)
2407
2408    vshlCode = '''
2409        if (imm >= sizeof(Element) * 8)
2410            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2411        else
2412            destElem = srcElem1 << imm;
2413    '''
2414    twoRegShiftInst("vshl", "NVshlD", unsignedTypes, 2, vshlCode)
2415    twoRegShiftInst("vshl", "NVshlQ", unsignedTypes, 4, vshlCode)
2416
2417    vsliCode = '''
2418        if (imm >= sizeof(Element) * 8)
2419            destElem = destElem;
2420        else
2421            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2422    '''
2423    twoRegShiftInst("vsli", "NVsliD", unsignedTypes, 2, vsliCode, True)
2424    twoRegShiftInst("vsli", "NVsliQ", unsignedTypes, 4, vsliCode, True)
2425
2426    vqshlCode = '''
2427        FPSCR fpscr = (FPSCR)Fpscr;
2428        if (imm >= sizeof(Element) * 8) {
2429            if (srcElem1 != 0) {
2430                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2431                if (srcElem1 > 0)
2432                    destElem = ~destElem;
2433                fpscr.qc = 1;
2434            } else {
2435                destElem = 0;
2436            }
2437        } else if (imm) {
2438            destElem = (srcElem1 << imm);
2439            uint64_t topBits = bits((uint64_t)srcElem1,
2440                                    sizeof(Element) * 8 - 1,
2441                                    sizeof(Element) * 8 - 1 - imm);
2442            if (topBits != 0 && topBits != mask(imm + 1)) {
2443                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2444                if (srcElem1 > 0)
2445                    destElem = ~destElem;
2446                fpscr.qc = 1;
2447            }
2448        } else {
2449            destElem = srcElem1;
2450        }
2451        Fpscr = fpscr;
2452    '''
2453    twoRegShiftInst("vqshl", "NVqshlD", signedTypes, 2, vqshlCode)
2454    twoRegShiftInst("vqshl", "NVqshlQ", signedTypes, 4, vqshlCode)
2455
2456    vqshluCode = '''
2457        FPSCR fpscr = (FPSCR)Fpscr;
2458        if (imm >= sizeof(Element) * 8) {
2459            if (srcElem1 != 0) {
2460                destElem = mask(sizeof(Element) * 8);
2461                fpscr.qc = 1;
2462            } else {
2463                destElem = 0;
2464            }
2465        } else if (imm) {
2466            destElem = (srcElem1 << imm);
2467            uint64_t topBits = bits((uint64_t)srcElem1,
2468                                    sizeof(Element) * 8 - 1,
2469                                    sizeof(Element) * 8 - imm);
2470            if (topBits != 0) {
2471                destElem = mask(sizeof(Element) * 8);
2472                fpscr.qc = 1;
2473            }
2474        } else {
2475            destElem = srcElem1;
2476        }
2477        Fpscr = fpscr;
2478    '''
2479    twoRegShiftInst("vqshlu", "NVqshluD", unsignedTypes, 2, vqshluCode)
2480    twoRegShiftInst("vqshlu", "NVqshluQ", unsignedTypes, 4, vqshluCode)
2481
2482    vqshlusCode = '''
2483        FPSCR fpscr = (FPSCR)Fpscr;
2484        if (imm >= sizeof(Element) * 8) {
2485            if (srcElem1 < 0) {
2486                destElem = 0;
2487                fpscr.qc = 1;
2488            } else if (srcElem1 > 0) {
2489                destElem = mask(sizeof(Element) * 8);
2490                fpscr.qc = 1;
2491            } else {
2492                destElem = 0;
2493            }
2494        } else if (imm) {
2495            destElem = (srcElem1 << imm);
2496            uint64_t topBits = bits((uint64_t)srcElem1,
2497                                    sizeof(Element) * 8 - 1,
2498                                    sizeof(Element) * 8 - imm);
2499            if (srcElem1 < 0) {
2500                destElem = 0;
2501                fpscr.qc = 1;
2502            } else if (topBits != 0) {
2503                destElem = mask(sizeof(Element) * 8);
2504                fpscr.qc = 1;
2505            }
2506        } else {
2507            if (srcElem1 < 0) {
2508                fpscr.qc = 1;
2509                destElem = 0;
2510            } else {
2511                destElem = srcElem1;
2512            }
2513        }
2514        Fpscr = fpscr;
2515    '''
2516    twoRegShiftInst("vqshlus", "NVqshlusD", signedTypes, 2, vqshlusCode)
2517    twoRegShiftInst("vqshlus", "NVqshlusQ", signedTypes, 4, vqshlusCode)
2518
2519    vshrnCode = '''
2520        if (imm >= sizeof(srcElem1) * 8) {
2521            destElem = 0;
2522        } else {
2523            destElem = srcElem1 >> imm;
2524        }
2525    '''
2526    twoRegNarrowShiftInst("vshrn", "NVshrn", smallUnsignedTypes, vshrnCode)
2527
2528    vrshrnCode = '''
2529        if (imm > sizeof(srcElem1) * 8) {
2530            destElem = 0;
2531        } else if (imm) {
2532            Element rBit = bits(srcElem1, imm - 1);
2533            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2534        } else {
2535            destElem = srcElem1;
2536        }
2537    '''
2538    twoRegNarrowShiftInst("vrshrn", "NVrshrn", smallUnsignedTypes, vrshrnCode)
2539
2540    vqshrnCode = '''
2541        FPSCR fpscr = (FPSCR)Fpscr;
2542        if (imm > sizeof(srcElem1) * 8) {
2543            if (srcElem1 != 0 && srcElem1 != -1)
2544                fpscr.qc = 1;
2545            destElem = 0;
2546        } else if (imm) {
2547            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2548            mid |= -(mid & ((BigElement)1 <<
2549                        (sizeof(BigElement) * 8 - 1 - imm)));
2550            if (mid != (Element)mid) {
2551                destElem = mask(sizeof(Element) * 8 - 1);
2552                if (srcElem1 < 0)
2553                    destElem = ~destElem;
2554                fpscr.qc = 1;
2555            } else {
2556                destElem = mid;
2557            }
2558        } else {
2559            destElem = srcElem1;
2560        }
2561        Fpscr = fpscr;
2562    '''
2563    twoRegNarrowShiftInst("vqshrn", "NVqshrn", smallSignedTypes, vqshrnCode)
2564
2565    vqshrunCode = '''
2566        FPSCR fpscr = (FPSCR)Fpscr;
2567        if (imm > sizeof(srcElem1) * 8) {
2568            if (srcElem1 != 0)
2569                fpscr.qc = 1;
2570            destElem = 0;
2571        } else if (imm) {
2572            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2573            if (mid != (Element)mid) {
2574                destElem = mask(sizeof(Element) * 8);
2575                fpscr.qc = 1;
2576            } else {
2577                destElem = mid;
2578            }
2579        } else {
2580            destElem = srcElem1;
2581        }
2582        Fpscr = fpscr;
2583    '''
2584    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2585            smallUnsignedTypes, vqshrunCode)
2586
2587    vqshrunsCode = '''
2588        FPSCR fpscr = (FPSCR)Fpscr;
2589        if (imm > sizeof(srcElem1) * 8) {
2590            if (srcElem1 != 0)
2591                fpscr.qc = 1;
2592            destElem = 0;
2593        } else if (imm) {
2594            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2595            if (bits(mid, sizeof(BigElement) * 8 - 1,
2596                          sizeof(Element) * 8) != 0) {
2597                if (srcElem1 < 0) {
2598                    destElem = 0;
2599                } else {
2600                    destElem = mask(sizeof(Element) * 8);
2601                }
2602                fpscr.qc = 1;
2603            } else {
2604                destElem = mid;
2605            }
2606        } else {
2607            destElem = srcElem1;
2608        }
2609        Fpscr = fpscr;
2610    '''
2611    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2612            smallSignedTypes, vqshrunsCode)
2613
2614    vqrshrnCode = '''
2615        FPSCR fpscr = (FPSCR)Fpscr;
2616        if (imm > sizeof(srcElem1) * 8) {
2617            if (srcElem1 != 0 && srcElem1 != -1)
2618                fpscr.qc = 1;
2619            destElem = 0;
2620        } else if (imm) {
2621            BigElement mid = (srcElem1 >> (imm - 1));
2622            uint64_t rBit = mid & 0x1;
2623            mid >>= 1;
2624            mid |= -(mid & ((BigElement)1 <<
2625                        (sizeof(BigElement) * 8 - 1 - imm)));
2626            mid += rBit;
2627            if (mid != (Element)mid) {
2628                destElem = mask(sizeof(Element) * 8 - 1);
2629                if (srcElem1 < 0)
2630                    destElem = ~destElem;
2631                fpscr.qc = 1;
2632            } else {
2633                destElem = mid;
2634            }
2635        } else {
2636            if (srcElem1 != (Element)srcElem1) {
2637                destElem = mask(sizeof(Element) * 8 - 1);
2638                if (srcElem1 < 0)
2639                    destElem = ~destElem;
2640                fpscr.qc = 1;
2641            } else {
2642                destElem = srcElem1;
2643            }
2644        }
2645        Fpscr = fpscr;
2646    '''
2647    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2648            smallSignedTypes, vqrshrnCode)
2649
2650    vqrshrunCode = '''
2651        FPSCR fpscr = (FPSCR)Fpscr;
2652        if (imm > sizeof(srcElem1) * 8) {
2653            if (srcElem1 != 0)
2654                fpscr.qc = 1;
2655            destElem = 0;
2656        } else if (imm) {
2657            BigElement mid = (srcElem1 >> (imm - 1));
2658            uint64_t rBit = mid & 0x1;
2659            mid >>= 1;
2660            mid += rBit;
2661            if (mid != (Element)mid) {
2662                destElem = mask(sizeof(Element) * 8);
2663                fpscr.qc = 1;
2664            } else {
2665                destElem = mid;
2666            }
2667        } else {
2668            if (srcElem1 != (Element)srcElem1) {
2669                destElem = mask(sizeof(Element) * 8 - 1);
2670                fpscr.qc = 1;
2671            } else {
2672                destElem = srcElem1;
2673            }
2674        }
2675        Fpscr = fpscr;
2676    '''
2677    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2678            smallUnsignedTypes, vqrshrunCode)
2679
2680    vqrshrunsCode = '''
2681        FPSCR fpscr = (FPSCR)Fpscr;
2682        if (imm > sizeof(srcElem1) * 8) {
2683            if (srcElem1 != 0)
2684                fpscr.qc = 1;
2685            destElem = 0;
2686        } else if (imm) {
2687            BigElement mid = (srcElem1 >> (imm - 1));
2688            uint64_t rBit = mid & 0x1;
2689            mid >>= 1;
2690            mid |= -(mid & ((BigElement)1 <<
2691                            (sizeof(BigElement) * 8 - 1 - imm)));
2692            mid += rBit;
2693            if (bits(mid, sizeof(BigElement) * 8 - 1,
2694                          sizeof(Element) * 8) != 0) {
2695                if (srcElem1 < 0) {
2696                    destElem = 0;
2697                } else {
2698                    destElem = mask(sizeof(Element) * 8);
2699                }
2700                fpscr.qc = 1;
2701            } else {
2702                destElem = mid;
2703            }
2704        } else {
2705            if (srcElem1 < 0) {
2706                fpscr.qc = 1;
2707                destElem = 0;
2708            } else {
2709                destElem = srcElem1;
2710            }
2711        }
2712        Fpscr = fpscr;
2713    '''
2714    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2715            smallSignedTypes, vqrshrunsCode)
2716
2717    vshllCode = '''
2718        if (imm >= sizeof(destElem) * 8) {
2719            destElem = 0;
2720        } else {
2721            destElem = (BigElement)srcElem1 << imm;
2722        }
2723    '''
2724    twoRegLongShiftInst("vshll", "NVshll", smallTypes, vshllCode)
2725
2726    vmovlCode = '''
2727        destElem = srcElem1;
2728    '''
2729    twoRegLongShiftInst("vmovl", "NVmovl", smallTypes, vmovlCode)
2730
2731    vcvt2ufxCode = '''
2732        FPSCR fpscr = Fpscr;
2733        if (flushToZero(srcElem1))
2734            fpscr.idc = 1;
2735        VfpSavedState state = prepFpState(VfpRoundNearest);
2736        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2737        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2738        __asm__ __volatile__("" :: "m" (destReg));
2739        finishVfp(fpscr, state, true);
2740        Fpscr = fpscr;
2741    '''
2742    twoRegShiftInst("vcvt", "NVcvt2ufxD", ("float",),
2743            2, vcvt2ufxCode, toInt = True)
2744    twoRegShiftInst("vcvt", "NVcvt2ufxQ", ("float",),
2745            4, vcvt2ufxCode, toInt = True)
2746
2747    vcvt2sfxCode = '''
2748        FPSCR fpscr = Fpscr;
2749        if (flushToZero(srcElem1))
2750            fpscr.idc = 1;
2751        VfpSavedState state = prepFpState(VfpRoundNearest);
2752        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2753        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2754        __asm__ __volatile__("" :: "m" (destReg));
2755        finishVfp(fpscr, state, true);
2756        Fpscr = fpscr;
2757    '''
2758    twoRegShiftInst("vcvt", "NVcvt2sfxD", ("float",),
2759            2, vcvt2sfxCode, toInt = True)
2760    twoRegShiftInst("vcvt", "NVcvt2sfxQ", ("float",),
2761            4, vcvt2sfxCode, toInt = True)
2762
2763    vcvtu2fpCode = '''
2764        FPSCR fpscr = Fpscr;
2765        VfpSavedState state = prepFpState(VfpRoundNearest);
2766        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2767        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2768        __asm__ __volatile__("" :: "m" (destElem));
2769        finishVfp(fpscr, state, true);
2770        Fpscr = fpscr;
2771    '''
2772    twoRegShiftInst("vcvt", "NVcvtu2fpD", ("float",),
2773            2, vcvtu2fpCode, fromInt = True)
2774    twoRegShiftInst("vcvt", "NVcvtu2fpQ", ("float",),
2775            4, vcvtu2fpCode, fromInt = True)
2776
2777    vcvts2fpCode = '''
2778        FPSCR fpscr = Fpscr;
2779        VfpSavedState state = prepFpState(VfpRoundNearest);
2780        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2781        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2782        __asm__ __volatile__("" :: "m" (destElem));
2783        finishVfp(fpscr, state, true);
2784        Fpscr = fpscr;
2785    '''
2786    twoRegShiftInst("vcvt", "NVcvts2fpD", ("float",),
2787            2, vcvts2fpCode, fromInt = True)
2788    twoRegShiftInst("vcvt", "NVcvts2fpQ", ("float",),
2789            4, vcvts2fpCode, fromInt = True)
2790
2791    vcvts2hCode = '''
2792        FPSCR fpscr = Fpscr;
2793        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2794        if (flushToZero(srcFp1))
2795            fpscr.idc = 1;
2796        VfpSavedState state = prepFpState(VfpRoundNearest);
2797        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2798                                : "m" (srcFp1), "m" (destElem));
2799        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2800                              fpscr.ahp, srcFp1);
2801        __asm__ __volatile__("" :: "m" (destElem));
2802        finishVfp(fpscr, state, true);
2803        Fpscr = fpscr;
2804    '''
2805    twoRegNarrowMiscInst("vcvt", "NVcvts2h", ("uint16_t",), vcvts2hCode)
2806
2807    vcvth2sCode = '''
2808        FPSCR fpscr = Fpscr;
2809        VfpSavedState state = prepFpState(VfpRoundNearest);
2810        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2811                                : "m" (srcElem1), "m" (destElem));
2812        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2813        __asm__ __volatile__("" :: "m" (destElem));
2814        finishVfp(fpscr, state, true);
2815        Fpscr = fpscr;
2816    '''
2817    twoRegLongMiscInst("vcvt", "NVcvth2s", ("uint16_t",), vcvth2sCode)
2818
2819    vrsqrteCode = '''
2820        destElem = unsignedRSqrtEstimate(srcElem1);
2821    '''
2822    twoRegMiscInst("vrsqrte", "NVrsqrteD", ("uint32_t",), 2, vrsqrteCode)
2823    twoRegMiscInst("vrsqrte", "NVrsqrteQ", ("uint32_t",), 4, vrsqrteCode)
2824
2825    vrsqrtefpCode = '''
2826        FPSCR fpscr = Fpscr;
2827        if (flushToZero(srcReg1))
2828            fpscr.idc = 1;
2829        destReg = fprSqrtEstimate(fpscr, srcReg1);
2830        Fpscr = fpscr;
2831    '''
2832    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", ("float",), 2, vrsqrtefpCode)
2833    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", ("float",), 4, vrsqrtefpCode)
2834
2835    vrecpeCode = '''
2836        destElem = unsignedRecipEstimate(srcElem1);
2837    '''
2838    twoRegMiscInst("vrecpe", "NVrecpeD", ("uint32_t",), 2, vrecpeCode)
2839    twoRegMiscInst("vrecpe", "NVrecpeQ", ("uint32_t",), 4, vrecpeCode)
2840
2841    vrecpefpCode = '''
2842        FPSCR fpscr = Fpscr;
2843        if (flushToZero(srcReg1))
2844            fpscr.idc = 1;
2845        destReg = fpRecipEstimate(fpscr, srcReg1);
2846        Fpscr = fpscr;
2847    '''
2848    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", ("float",), 2, vrecpefpCode)
2849    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", ("float",), 4, vrecpefpCode)
2850
2851    vrev16Code = '''
2852        destElem = srcElem1;
2853        unsigned groupSize = ((1 << 1) / sizeof(Element));
2854        unsigned reverseMask = (groupSize - 1);
2855        j = i ^ reverseMask;
2856    '''
2857    twoRegMiscInst("vrev16", "NVrev16D", ("uint8_t",), 2, vrev16Code)
2858    twoRegMiscInst("vrev16", "NVrev16Q", ("uint8_t",), 4, vrev16Code)
2859    vrev32Code = '''
2860        destElem = srcElem1;
2861        unsigned groupSize = ((1 << 2) / sizeof(Element));
2862        unsigned reverseMask = (groupSize - 1);
2863        j = i ^ reverseMask;
2864    '''
2865    twoRegMiscInst("vrev32", "NVrev32D",
2866            ("uint8_t", "uint16_t"), 2, vrev32Code)
2867    twoRegMiscInst("vrev32", "NVrev32Q",
2868            ("uint8_t", "uint16_t"), 4, vrev32Code)
2869    vrev64Code = '''
2870        destElem = srcElem1;
2871        unsigned groupSize = ((1 << 3) / sizeof(Element));
2872        unsigned reverseMask = (groupSize - 1);
2873        j = i ^ reverseMask;
2874    '''
2875    twoRegMiscInst("vrev64", "NVrev64D", smallUnsignedTypes, 2, vrev64Code)
2876    twoRegMiscInst("vrev64", "NVrev64Q", smallUnsignedTypes, 4, vrev64Code)
2877
2878    vpaddlCode = '''
2879        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2880    '''
2881    twoRegCondenseInst("vpaddl", "NVpaddlD", smallTypes, 2, vpaddlCode)
2882    twoRegCondenseInst("vpaddl", "NVpaddlQ", smallTypes, 4, vpaddlCode)
2883
2884    vpadalCode = '''
2885        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2886    '''
2887    twoRegCondenseInst("vpadal", "NVpadalD", smallTypes, 2, vpadalCode, True)
2888    twoRegCondenseInst("vpadal", "NVpadalQ", smallTypes, 4, vpadalCode, True)
2889
2890    vclsCode = '''
2891        unsigned count = 0;
2892        if (srcElem1 < 0) {
2893            srcElem1 <<= 1;
2894            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2895                count++;
2896                srcElem1 <<= 1;
2897            }
2898        } else {
2899            srcElem1 <<= 1;
2900            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2901                count++;
2902                srcElem1 <<= 1;
2903            }
2904        }
2905        destElem = count;
2906    '''
2907    twoRegMiscInst("vcls", "NVclsD", signedTypes, 2, vclsCode)
2908    twoRegMiscInst("vcls", "NVclsQ", signedTypes, 4, vclsCode)
2909
2910    vclzCode = '''
2911        unsigned count = 0;
2912        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2913            count++;
2914            srcElem1 <<= 1;
2915        }
2916        destElem = count;
2917    '''
2918    twoRegMiscInst("vclz", "NVclzD", signedTypes, 2, vclzCode)
2919    twoRegMiscInst("vclz", "NVclzQ", signedTypes, 4, vclzCode)
2920
2921    vcntCode = '''
2922        unsigned count = 0;
2923        while (srcElem1 && count < sizeof(Element) * 8) {
2924            count += srcElem1 & 0x1;
2925            srcElem1 >>= 1;
2926        }
2927        destElem = count;
2928    '''
2929    twoRegMiscInst("vcnt", "NVcntD", unsignedTypes, 2, vcntCode)
2930    twoRegMiscInst("vcnt", "NVcntQ", unsignedTypes, 4, vcntCode)
2931
2932    vmvnCode = '''
2933        destElem = ~srcElem1;
2934    '''
2935    twoRegMiscInst("vmvn", "NVmvnD", ("uint64_t",), 2, vmvnCode)
2936    twoRegMiscInst("vmvn", "NVmvnQ", ("uint64_t",), 4, vmvnCode)
2937
2938    vqabsCode = '''
2939        FPSCR fpscr = (FPSCR)Fpscr;
2940        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2941            fpscr.qc = 1;
2942            destElem = ~srcElem1;
2943        } else if (srcElem1 < 0) {
2944            destElem = -srcElem1;
2945        } else {
2946            destElem = srcElem1;
2947        }
2948        Fpscr = fpscr;
2949    '''
2950    twoRegMiscInst("vqabs", "NVqabsD", signedTypes, 2, vqabsCode)
2951    twoRegMiscInst("vqabs", "NVqabsQ", signedTypes, 4, vqabsCode)
2952
2953    vqnegCode = '''
2954        FPSCR fpscr = (FPSCR)Fpscr;
2955        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2956            fpscr.qc = 1;
2957            destElem = ~srcElem1;
2958        } else {
2959            destElem = -srcElem1;
2960        }
2961        Fpscr = fpscr;
2962    '''
2963    twoRegMiscInst("vqneg", "NVqnegD", signedTypes, 2, vqnegCode)
2964    twoRegMiscInst("vqneg", "NVqnegQ", signedTypes, 4, vqnegCode)
2965
2966    vabsCode = '''
2967        if (srcElem1 < 0) {
2968            destElem = -srcElem1;
2969        } else {
2970            destElem = srcElem1;
2971        }
2972    '''
2973    twoRegMiscInst("vabs", "NVabsD", signedTypes, 2, vabsCode)
2974    twoRegMiscInst("vabs", "NVabsQ", signedTypes, 4, vabsCode)
2975    vabsfpCode = '''
2976        union
2977        {
2978            uint32_t i;
2979            float f;
2980        } cStruct;
2981        cStruct.f = srcReg1;
2982        cStruct.i &= mask(sizeof(Element) * 8 - 1);
2983        destReg = cStruct.f;
2984    '''
2985    twoRegMiscInstFp("vabs", "NVabsDFp", ("float",), 2, vabsfpCode)
2986    twoRegMiscInstFp("vabs", "NVabsQFp", ("float",), 4, vabsfpCode)
2987
2988    vnegCode = '''
2989        destElem = -srcElem1;
2990    '''
2991    twoRegMiscInst("vneg", "NVnegD", signedTypes, 2, vnegCode)
2992    twoRegMiscInst("vneg", "NVnegQ", signedTypes, 4, vnegCode)
2993    vnegfpCode = '''
2994        destReg = -srcReg1;
2995    '''
2996    twoRegMiscInstFp("vneg", "NVnegDFp", ("float",), 2, vnegfpCode)
2997    twoRegMiscInstFp("vneg", "NVnegQFp", ("float",), 4, vnegfpCode)
2998
2999    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3000    twoRegMiscInst("vcgt", "NVcgtD", signedTypes, 2, vcgtCode)
3001    twoRegMiscInst("vcgt", "NVcgtQ", signedTypes, 4, vcgtCode)
3002    vcgtfpCode = '''
3003        FPSCR fpscr = (FPSCR)Fpscr;
3004        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3005                             true, true, VfpRoundNearest);
3006        destReg = (res == 0) ? -1 : 0;
3007        if (res == 2.0)
3008            fpscr.ioc = 1;
3009        Fpscr = fpscr;
3010    '''
3011    twoRegMiscInstFp("vcgt", "NVcgtDFp", ("float",),
3012            2, vcgtfpCode, toInt = True)
3013    twoRegMiscInstFp("vcgt", "NVcgtQFp", ("float",),
3014            4, vcgtfpCode, toInt = True)
3015
3016    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3017    twoRegMiscInst("vcge", "NVcgeD", signedTypes, 2, vcgeCode)
3018    twoRegMiscInst("vcge", "NVcgeQ", signedTypes, 4, vcgeCode)
3019    vcgefpCode = '''
3020        FPSCR fpscr = (FPSCR)Fpscr;
3021        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3022                             true, true, VfpRoundNearest);
3023        destReg = (res == 0) ? -1 : 0;
3024        if (res == 2.0)
3025            fpscr.ioc = 1;
3026        Fpscr = fpscr;
3027    '''
3028    twoRegMiscInstFp("vcge", "NVcgeDFp", ("float",),
3029            2, vcgefpCode, toInt = True)
3030    twoRegMiscInstFp("vcge", "NVcgeQFp", ("float",),
3031            4, vcgefpCode, toInt = True)
3032
3033    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3034    twoRegMiscInst("vceq", "NVceqD", signedTypes, 2, vceqCode)
3035    twoRegMiscInst("vceq", "NVceqQ", signedTypes, 4, vceqCode)
3036    vceqfpCode = '''
3037        FPSCR fpscr = (FPSCR)Fpscr;
3038        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3039                             true, true, VfpRoundNearest);
3040        destReg = (res == 0) ? -1 : 0;
3041        if (res == 2.0)
3042            fpscr.ioc = 1;
3043        Fpscr = fpscr;
3044    '''
3045    twoRegMiscInstFp("vceq", "NVceqDFp", ("float",),
3046            2, vceqfpCode, toInt = True)
3047    twoRegMiscInstFp("vceq", "NVceqQFp", ("float",),
3048            4, vceqfpCode, toInt = True)
3049
3050    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3051    twoRegMiscInst("vcle", "NVcleD", signedTypes, 2, vcleCode)
3052    twoRegMiscInst("vcle", "NVcleQ", signedTypes, 4, vcleCode)
3053    vclefpCode = '''
3054        FPSCR fpscr = (FPSCR)Fpscr;
3055        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3056                             true, true, VfpRoundNearest);
3057        destReg = (res == 0) ? -1 : 0;
3058        if (res == 2.0)
3059            fpscr.ioc = 1;
3060        Fpscr = fpscr;
3061    '''
3062    twoRegMiscInstFp("vcle", "NVcleDFp", ("float",),
3063            2, vclefpCode, toInt = True)
3064    twoRegMiscInstFp("vcle", "NVcleQFp", ("float",),
3065            4, vclefpCode, toInt = True)
3066
3067    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3068    twoRegMiscInst("vclt", "NVcltD", signedTypes, 2, vcltCode)
3069    twoRegMiscInst("vclt", "NVcltQ", signedTypes, 4, vcltCode)
3070    vcltfpCode = '''
3071        FPSCR fpscr = (FPSCR)Fpscr;
3072        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3073                             true, true, VfpRoundNearest);
3074        destReg = (res == 0) ? -1 : 0;
3075        if (res == 2.0)
3076            fpscr.ioc = 1;
3077        Fpscr = fpscr;
3078    '''
3079    twoRegMiscInstFp("vclt", "NVcltDFp", ("float",),
3080            2, vcltfpCode, toInt = True)
3081    twoRegMiscInstFp("vclt", "NVcltQFp", ("float",),
3082            4, vcltfpCode, toInt = True)
3083
3084    vswpCode = '''
3085        FloatRegBits mid;
3086        for (unsigned r = 0; r < rCount; r++) {
3087            mid = srcReg1.regs[r];
3088            srcReg1.regs[r] = destReg.regs[r];
3089            destReg.regs[r] = mid;
3090        }
3091    '''
3092    twoRegMiscScramble("vswp", "NVswpD", ("uint64_t",), 2, vswpCode)
3093    twoRegMiscScramble("vswp", "NVswpQ", ("uint64_t",), 4, vswpCode)
3094
3095    vtrnCode = '''
3096        Element mid;
3097        for (unsigned i = 0; i < eCount; i += 2) {
3098            mid = srcReg1.elements[i];
3099            srcReg1.elements[i] = destReg.elements[i + 1];
3100            destReg.elements[i + 1] = mid;
3101        }
3102    '''
3103    twoRegMiscScramble("vtrn", "NVtrnD", unsignedTypes, 2, vtrnCode)
3104    twoRegMiscScramble("vtrn", "NVtrnQ", unsignedTypes, 4, vtrnCode)
3105
3106    vuzpCode = '''
3107        Element mid[eCount];
3108        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3109        for (unsigned i = 0; i < eCount / 2; i++) {
3110            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3111            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3112            destReg.elements[i] = destReg.elements[2 * i];
3113        }
3114        for (unsigned i = 0; i < eCount / 2; i++) {
3115            destReg.elements[eCount / 2 + i] = mid[2 * i];
3116        }
3117    '''
3118    twoRegMiscScramble("vuzp", "NVuzpD", unsignedTypes, 2, vuzpCode)
3119    twoRegMiscScramble("vuzp", "NVuzpQ", unsignedTypes, 4, vuzpCode)
3120
3121    vzipCode = '''
3122        Element mid[eCount];
3123        memcpy(&mid, &destReg, sizeof(destReg));
3124        for (unsigned i = 0; i < eCount / 2; i++) {
3125            destReg.elements[2 * i] = mid[i];
3126            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3127        }
3128        for (int i = 0; i < eCount / 2; i++) {
3129            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3130            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3131        }
3132    '''
3133    twoRegMiscScramble("vzip", "NVzipD", unsignedTypes, 2, vzipCode)
3134    twoRegMiscScramble("vzip", "NVzipQ", unsignedTypes, 4, vzipCode)
3135
3136    vmovnCode = 'destElem = srcElem1;'
3137    twoRegNarrowMiscInst("vmovn", "NVmovn", smallUnsignedTypes, vmovnCode)
3138
3139    vdupCode = 'destElem = srcElem1;'
3140    twoRegMiscScInst("vdup", "NVdupD", smallUnsignedTypes, 2, vdupCode)
3141    twoRegMiscScInst("vdup", "NVdupQ", smallUnsignedTypes, 4, vdupCode)
3142
3143    def vdupGprInst(name, Name, types, rCount):
3144        global header_output, exec_output
3145        eWalkCode = '''
3146        RegVect destReg;
3147        for (unsigned i = 0; i < eCount; i++) {
3148            destReg.elements[i] = htog((Element)Op1);
3149        }
3150        '''
3151        for reg in range(rCount):
3152            eWalkCode += '''
3153            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3154            ''' % { "reg" : reg }
3155        iop = InstObjParams(name, Name,
3156                            "RegRegOp",
3157                            { "code": eWalkCode,
3158                              "r_count": rCount,
3159                              "predicate_test": predicateTest }, [])
3160        header_output += NeonRegRegOpDeclare.subst(iop)
3161        exec_output += NeonEqualRegExecute.subst(iop)
3162        for type in types:
3163            substDict = { "targs" : type,
3164                          "class_name" : Name }
3165            exec_output += NeonExecDeclare.subst(substDict)
3166    vdupGprInst("vdup", "NVdupDGpr", smallUnsignedTypes, 2)
3167    vdupGprInst("vdup", "NVdupQGpr", smallUnsignedTypes, 4)
3168
3169    vmovCode = 'destElem = imm;'
3170    oneRegImmInst("vmov", "NVmoviD", ("uint64_t",), 2, vmovCode)
3171    oneRegImmInst("vmov", "NVmoviQ", ("uint64_t",), 4, vmovCode)
3172
3173    vorrCode = 'destElem |= imm;'
3174    oneRegImmInst("vorr", "NVorriD", ("uint64_t",), 2, vorrCode, True)
3175    oneRegImmInst("vorr", "NVorriQ", ("uint64_t",), 4, vorrCode, True)
3176
3177    vmvnCode = 'destElem = ~imm;'
3178    oneRegImmInst("vmvn", "NVmvniD", ("uint64_t",), 2, vmvnCode)
3179    oneRegImmInst("vmvn", "NVmvniQ", ("uint64_t",), 4, vmvnCode)
3180
3181    vbicCode = 'destElem &= ~imm;'
3182    oneRegImmInst("vbic", "NVbiciD", ("uint64_t",), 2, vbicCode, True)
3183    oneRegImmInst("vbic", "NVbiciQ", ("uint64_t",), 4, vbicCode, True)
3184
3185    vqmovnCode = '''
3186    FPSCR fpscr = (FPSCR)Fpscr;
3187    destElem = srcElem1;
3188    if ((BigElement)destElem != srcElem1) {
3189        fpscr.qc = 1;
3190        destElem = mask(sizeof(Element) * 8 - 1);
3191        if (srcElem1 < 0)
3192            destElem = ~destElem;
3193    }
3194    Fpscr = fpscr;
3195    '''
3196    twoRegNarrowMiscInst("vqmovn", "NVqmovn", smallSignedTypes, vqmovnCode)
3197
3198    vqmovunCode = '''
3199    FPSCR fpscr = (FPSCR)Fpscr;
3200    destElem = srcElem1;
3201    if ((BigElement)destElem != srcElem1) {
3202        fpscr.qc = 1;
3203        destElem = mask(sizeof(Element) * 8);
3204    }
3205    Fpscr = fpscr;
3206    '''
3207    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3208            smallUnsignedTypes, vqmovunCode)
3209
3210    vqmovunsCode = '''
3211    FPSCR fpscr = (FPSCR)Fpscr;
3212    destElem = srcElem1;
3213    if (srcElem1 < 0 ||
3214            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3215        fpscr.qc = 1;
3216        destElem = mask(sizeof(Element) * 8);
3217        if (srcElem1 < 0)
3218            destElem = ~destElem;
3219    }
3220    Fpscr = fpscr;
3221    '''
3222    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3223            smallSignedTypes, vqmovunsCode)
3224
3225    def buildVext(name, Name, types, rCount, op):
3226        global header_output, exec_output
3227        eWalkCode = '''
3228        RegVect srcReg1, srcReg2, destReg;
3229        '''
3230        for reg in range(rCount):
3231            eWalkCode += simdEnabledCheckCode + '''
3232                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3233                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3234            ''' % { "reg" : reg }
3235        eWalkCode += op
3236        for reg in range(rCount):
3237            eWalkCode += '''
3238            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3239            ''' % { "reg" : reg }
3240        iop = InstObjParams(name, Name,
3241                            "RegRegRegImmOp",
3242                            { "code": eWalkCode,
3243                              "r_count": rCount,
3244                              "predicate_test": predicateTest }, [])
3245        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3246        exec_output += NeonEqualRegExecute.subst(iop)
3247        for type in types:
3248            substDict = { "targs" : type,
3249                          "class_name" : Name }
3250            exec_output += NeonExecDeclare.subst(substDict)
3251
3252    vextCode = '''
3253        for (unsigned i = 0; i < eCount; i++) {
3254            unsigned index = i + imm;
3255            if (index < eCount) {
3256                destReg.elements[i] = srcReg1.elements[index];
3257            } else {
3258                index -= eCount;
3259                assert(index < eCount);
3260                destReg.elements[i] = srcReg2.elements[index];
3261            }
3262        }
3263    '''
3264    buildVext("vext", "NVextD", ("uint8_t",), 2, vextCode)
3265    buildVext("vext", "NVextQ", ("uint8_t",), 4, vextCode)
3266
3267    def buildVtbxl(name, Name, length, isVtbl):
3268        global header_output, decoder_output, exec_output
3269        code = '''
3270            union
3271            {
3272                uint8_t bytes[32];
3273                FloatRegBits regs[8];
3274            } table;
3275
3276            union
3277            {
3278                uint8_t bytes[8];
3279                FloatRegBits regs[2];
3280            } destReg, srcReg2;
3281
3282            const unsigned length = %(length)d;
3283            const bool isVtbl = %(isVtbl)s;
3284
3285            srcReg2.regs[0] = htog(FpOp2P0.uw);
3286            srcReg2.regs[1] = htog(FpOp2P1.uw);
3287
3288            destReg.regs[0] = htog(FpDestP0.uw);
3289            destReg.regs[1] = htog(FpDestP1.uw);
3290        ''' % { "length" : length, "isVtbl" : isVtbl }
3291        for reg in range(8):
3292            if reg < length * 2:
3293                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3294                        { "reg" : reg }
3295            else:
3296                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3297        code += '''
3298        for (unsigned i = 0; i < sizeof(destReg); i++) {
3299            uint8_t index = srcReg2.bytes[i];
3300            if (index < 8 * length) {
3301                destReg.bytes[i] = table.bytes[index];
3302            } else {
3303                if (isVtbl)
3304                    destReg.bytes[i] = 0;
3305                // else destReg.bytes[i] unchanged
3306            }
3307        }
3308
3309        FpDestP0.uw = gtoh(destReg.regs[0]);
3310        FpDestP1.uw = gtoh(destReg.regs[1]);
3311        '''
3312        iop = InstObjParams(name, Name,
3313                            "RegRegRegOp",
3314                            { "code": code,
3315                              "predicate_test": predicateTest }, [])
3316        header_output += RegRegRegOpDeclare.subst(iop)
3317        decoder_output += RegRegRegOpConstructor.subst(iop)
3318        exec_output += PredOpExecute.subst(iop)
3319
3320    buildVtbxl("vtbl", "NVtbl1", 1, "true")
3321    buildVtbxl("vtbl", "NVtbl2", 2, "true")
3322    buildVtbxl("vtbl", "NVtbl3", 3, "true")
3323    buildVtbxl("vtbl", "NVtbl4", 4, "true")
3324
3325    buildVtbxl("vtbx", "NVtbx1", 1, "false")
3326    buildVtbxl("vtbx", "NVtbx2", 2, "false")
3327    buildVtbxl("vtbx", "NVtbx3", 3, "false")
3328    buildVtbxl("vtbx", "NVtbx4", 4, "false")
3329}};
3330