neon.isa revision 8782:10c9297e14d5
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (isSnan(op1) || isSnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (isSnan(op1) || isSnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (isSnan(op1) || isSnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (isSnan(op1) || isSnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (isSnan(op1) || isSnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (isSnan(op1) || isSnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623
624    header_output = ""
625    exec_output = ""
626
627    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630    signedTypes = smallSignedTypes + ("int64_t",)
631    smallTypes = smallUnsignedTypes + smallSignedTypes
632    allTypes = unsignedTypes + signedTypes
633
634    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635                          readDest=False, pairwise=False):
636        global header_output, exec_output
637        eWalkCode = simdEnabledCheckCode + '''
638        RegVect srcReg1, srcReg2, destReg;
639        '''
640        for reg in range(rCount):
641            eWalkCode += '''
642                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644            ''' % { "reg" : reg }
645            if readDest:
646                eWalkCode += '''
647                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648                ''' % { "reg" : reg }
649        readDestCode = ''
650        if readDest:
651            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652        if pairwise:
653            eWalkCode += '''
654            for (unsigned i = 0; i < eCount; i++) {
655                Element srcElem1 = gtoh(2 * i < eCount ?
656                                        srcReg1.elements[2 * i] :
657                                        srcReg2.elements[2 * i - eCount]);
658                Element srcElem2 = gtoh(2 * i < eCount ?
659                                        srcReg1.elements[2 * i + 1] :
660                                        srcReg2.elements[2 * i + 1 - eCount]);
661                Element destElem;
662                %(readDest)s
663                %(op)s
664                destReg.elements[i] = htog(destElem);
665            }
666            ''' % { "op" : op, "readDest" : readDestCode }
667        else:
668            eWalkCode += '''
669            for (unsigned i = 0; i < eCount; i++) {
670                Element srcElem1 = gtoh(srcReg1.elements[i]);
671                Element srcElem2 = gtoh(srcReg2.elements[i]);
672                Element destElem;
673                %(readDest)s
674                %(op)s
675                destReg.elements[i] = htog(destElem);
676            }
677            ''' % { "op" : op, "readDest" : readDestCode }
678        for reg in range(rCount):
679            eWalkCode += '''
680            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681            ''' % { "reg" : reg }
682        iop = InstObjParams(name, Name,
683                            "RegRegRegOp",
684                            { "code": eWalkCode,
685                              "r_count": rCount,
686                              "predicate_test": predicateTest,
687                              "op_class": opClass }, [])
688        header_output += NeonRegRegRegOpDeclare.subst(iop)
689        exec_output += NeonEqualRegExecute.subst(iop)
690        for type in types:
691            substDict = { "targs" : type,
692                          "class_name" : Name }
693            exec_output += NeonExecDeclare.subst(substDict)
694
695    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696                            readDest=False, pairwise=False, toInt=False):
697        global header_output, exec_output
698        eWalkCode = simdEnabledCheckCode + '''
699        typedef FloatReg FloatVect[rCount];
700        FloatVect srcRegs1, srcRegs2;
701        '''
702        if toInt:
703            eWalkCode += 'RegVect destRegs;\n'
704        else:
705            eWalkCode += 'FloatVect destRegs;\n'
706        for reg in range(rCount):
707            eWalkCode += '''
708                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710            ''' % { "reg" : reg }
711            if readDest:
712                if toInt:
713                    eWalkCode += '''
714                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715                    ''' % { "reg" : reg }
716                else:
717                    eWalkCode += '''
718                        destRegs[%(reg)d] = FpDestP%(reg)d;
719                    ''' % { "reg" : reg }
720        readDestCode = ''
721        if readDest:
722            readDestCode = 'destReg = destRegs[r];'
723        destType = 'FloatReg'
724        writeDest = 'destRegs[r] = destReg;'
725        if toInt:
726            destType = 'FloatRegBits'
727            writeDest = 'destRegs.regs[r] = destReg;'
728        if pairwise:
729            eWalkCode += '''
730            for (unsigned r = 0; r < rCount; r++) {
731                FloatReg srcReg1 = (2 * r < rCount) ?
732                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733                FloatReg srcReg2 = (2 * r < rCount) ?
734                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735                %(destType)s destReg;
736                %(readDest)s
737                %(op)s
738                %(writeDest)s
739            }
740            ''' % { "op" : op,
741                    "readDest" : readDestCode,
742                    "destType" : destType,
743                    "writeDest" : writeDest }
744        else:
745            eWalkCode += '''
746            for (unsigned r = 0; r < rCount; r++) {
747                FloatReg srcReg1 = srcRegs1[r];
748                FloatReg srcReg2 = srcRegs2[r];
749                %(destType)s destReg;
750                %(readDest)s
751                %(op)s
752                %(writeDest)s
753            }
754            ''' % { "op" : op,
755                    "readDest" : readDestCode,
756                    "destType" : destType,
757                    "writeDest" : writeDest }
758        for reg in range(rCount):
759            if toInt:
760                eWalkCode += '''
761                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762                ''' % { "reg" : reg }
763            else:
764                eWalkCode += '''
765                FpDestP%(reg)d = destRegs[%(reg)d];
766                ''' % { "reg" : reg }
767        iop = InstObjParams(name, Name,
768                            "FpRegRegRegOp",
769                            { "code": eWalkCode,
770                              "r_count": rCount,
771                              "predicate_test": predicateTest,
772                              "op_class": opClass }, [])
773        header_output += NeonRegRegRegOpDeclare.subst(iop)
774        exec_output += NeonEqualRegExecute.subst(iop)
775        for type in types:
776            substDict = { "targs" : type,
777                          "class_name" : Name }
778            exec_output += NeonExecDeclare.subst(substDict)
779
780    def threeUnequalRegInst(name, Name, opClass, types, op,
781                            bigSrc1, bigSrc2, bigDest, readDest):
782        global header_output, exec_output
783        src1Cnt = src2Cnt = destCnt = 2
784        src1Prefix = src2Prefix = destPrefix = ''
785        if bigSrc1:
786            src1Cnt = 4
787            src1Prefix = 'Big'
788        if bigSrc2:
789            src2Cnt = 4
790            src2Prefix = 'Big'
791        if bigDest:
792            destCnt = 4
793            destPrefix = 'Big'
794        eWalkCode = simdEnabledCheckCode + '''
795            %sRegVect srcReg1;
796            %sRegVect srcReg2;
797            %sRegVect destReg;
798        ''' % (src1Prefix, src2Prefix, destPrefix)
799        for reg in range(src1Cnt):
800            eWalkCode += '''
801                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802            ''' % { "reg" : reg }
803        for reg in range(src2Cnt):
804            eWalkCode += '''
805                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806            ''' % { "reg" : reg }
807        if readDest:
808            for reg in range(destCnt):
809                eWalkCode += '''
810                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811                ''' % { "reg" : reg }
812        readDestCode = ''
813        if readDest:
814            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815        eWalkCode += '''
816        for (unsigned i = 0; i < eCount; i++) {
817            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819            %(destPrefix)sElement destElem;
820            %(readDest)s
821            %(op)s
822            destReg.elements[i] = htog(destElem);
823        }
824        ''' % { "op" : op, "readDest" : readDestCode,
825                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826                "destPrefix" : destPrefix }
827        for reg in range(destCnt):
828            eWalkCode += '''
829            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830            ''' % { "reg" : reg }
831        iop = InstObjParams(name, Name,
832                            "RegRegRegOp",
833                            { "code": eWalkCode,
834                              "r_count": 2,
835                              "predicate_test": predicateTest,
836                              "op_class": opClass }, [])
837        header_output += NeonRegRegRegOpDeclare.subst(iop)
838        exec_output += NeonUnequalRegExecute.subst(iop)
839        for type in types:
840            substDict = { "targs" : type,
841                          "class_name" : Name }
842            exec_output += NeonExecDeclare.subst(substDict)
843
844    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845        threeUnequalRegInst(name, Name, opClass, types, op,
846                            True, True, False, readDest)
847
848    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849        threeUnequalRegInst(name, Name, opClass, types, op,
850                            False, False, True, readDest)
851
852    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853        threeUnequalRegInst(name, Name, opClass, types, op,
854                            True, False, True, readDest)
855
856    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857        global header_output, exec_output
858        eWalkCode = simdEnabledCheckCode + '''
859        RegVect srcReg1, srcReg2, destReg;
860        '''
861        for reg in range(rCount):
862            eWalkCode += '''
863                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865            ''' % { "reg" : reg }
866            if readDest:
867                eWalkCode += '''
868                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869                ''' % { "reg" : reg }
870        readDestCode = ''
871        if readDest:
872            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873        eWalkCode += '''
874        if (imm < 0 && imm >= eCount) {
875            if (FullSystem)
876                fault = new UndefinedInstruction;
877            else
878                fault = new UndefinedInstruction(false, mnemonic);
879        } else {
880            for (unsigned i = 0; i < eCount; i++) {
881                Element srcElem1 = gtoh(srcReg1.elements[i]);
882                Element srcElem2 = gtoh(srcReg2.elements[imm]);
883                Element destElem;
884                %(readDest)s
885                %(op)s
886                destReg.elements[i] = htog(destElem);
887            }
888        }
889        ''' % { "op" : op, "readDest" : readDestCode }
890        for reg in range(rCount):
891            eWalkCode += '''
892            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893            ''' % { "reg" : reg }
894        iop = InstObjParams(name, Name,
895                            "RegRegRegImmOp",
896                            { "code": eWalkCode,
897                              "r_count": rCount,
898                              "predicate_test": predicateTest,
899                              "op_class": opClass }, [])
900        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901        exec_output += NeonEqualRegExecute.subst(iop)
902        for type in types:
903            substDict = { "targs" : type,
904                          "class_name" : Name }
905            exec_output += NeonExecDeclare.subst(substDict)
906
907    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908        global header_output, exec_output
909        rCount = 2
910        eWalkCode = simdEnabledCheckCode + '''
911        RegVect srcReg1, srcReg2;
912        BigRegVect destReg;
913        '''
914        for reg in range(rCount):
915            eWalkCode += '''
916                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918            ''' % { "reg" : reg }
919        if readDest:
920            for reg in range(2 * rCount):
921                eWalkCode += '''
922                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923                ''' % { "reg" : reg }
924        readDestCode = ''
925        if readDest:
926            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
927        eWalkCode += '''
928        if (imm < 0 && imm >= eCount) {
929            if (FullSystem)
930                fault = new UndefinedInstruction;
931            else
932                fault = new UndefinedInstruction(false, mnemonic);
933        } else {
934            for (unsigned i = 0; i < eCount; i++) {
935                Element srcElem1 = gtoh(srcReg1.elements[i]);
936                Element srcElem2 = gtoh(srcReg2.elements[imm]);
937                BigElement destElem;
938                %(readDest)s
939                %(op)s
940                destReg.elements[i] = htog(destElem);
941            }
942        }
943        ''' % { "op" : op, "readDest" : readDestCode }
944        for reg in range(2 * rCount):
945            eWalkCode += '''
946            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947            ''' % { "reg" : reg }
948        iop = InstObjParams(name, Name,
949                            "RegRegRegImmOp",
950                            { "code": eWalkCode,
951                              "r_count": rCount,
952                              "predicate_test": predicateTest,
953                              "op_class": opClass }, [])
954        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955        exec_output += NeonUnequalRegExecute.subst(iop)
956        for type in types:
957            substDict = { "targs" : type,
958                          "class_name" : Name }
959            exec_output += NeonExecDeclare.subst(substDict)
960
961    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962        global header_output, exec_output
963        eWalkCode = simdEnabledCheckCode + '''
964        typedef FloatReg FloatVect[rCount];
965        FloatVect srcRegs1, srcRegs2, destRegs;
966        '''
967        for reg in range(rCount):
968            eWalkCode += '''
969                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971            ''' % { "reg" : reg }
972            if readDest:
973                eWalkCode += '''
974                    destRegs[%(reg)d] = FpDestP%(reg)d;
975                ''' % { "reg" : reg }
976        readDestCode = ''
977        if readDest:
978            readDestCode = 'destReg = destRegs[i];'
979        eWalkCode += '''
980        if (imm < 0 && imm >= eCount) {
981            if (FullSystem)
982                fault = new UndefinedInstruction;
983            else
984                fault = new UndefinedInstruction(false, mnemonic);
985        } else {
986            for (unsigned i = 0; i < rCount; i++) {
987                FloatReg srcReg1 = srcRegs1[i];
988                FloatReg srcReg2 = srcRegs2[imm];
989                FloatReg destReg;
990                %(readDest)s
991                %(op)s
992                destRegs[i] = destReg;
993            }
994        }
995        ''' % { "op" : op, "readDest" : readDestCode }
996        for reg in range(rCount):
997            eWalkCode += '''
998            FpDestP%(reg)d = destRegs[%(reg)d];
999            ''' % { "reg" : reg }
1000        iop = InstObjParams(name, Name,
1001                            "FpRegRegRegImmOp",
1002                            { "code": eWalkCode,
1003                              "r_count": rCount,
1004                              "predicate_test": predicateTest,
1005                              "op_class": opClass }, [])
1006        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007        exec_output += NeonEqualRegExecute.subst(iop)
1008        for type in types:
1009            substDict = { "targs" : type,
1010                          "class_name" : Name }
1011            exec_output += NeonExecDeclare.subst(substDict)
1012
1013    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014            readDest=False, toInt=False, fromInt=False):
1015        global header_output, exec_output
1016        eWalkCode = simdEnabledCheckCode + '''
1017        RegVect srcRegs1, destRegs;
1018        '''
1019        for reg in range(rCount):
1020            eWalkCode += '''
1021                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022            ''' % { "reg" : reg }
1023            if readDest:
1024                eWalkCode += '''
1025                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026                ''' % { "reg" : reg }
1027        readDestCode = ''
1028        if readDest:
1029            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1030            if toInt:
1031                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1033        if fromInt:
1034            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035        declDest = 'Element destElem;'
1036        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1037        if toInt:
1038            declDest = 'FloatRegBits destReg;'
1039            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1040        eWalkCode += '''
1041        for (unsigned i = 0; i < eCount; i++) {
1042            %(readOp)s
1043            %(declDest)s
1044            %(readDest)s
1045            %(op)s
1046            %(writeDest)s
1047        }
1048        ''' % { "readOp" : readOpCode,
1049                "declDest" : declDest,
1050                "readDest" : readDestCode,
1051                "op" : op,
1052                "writeDest" : writeDestCode }
1053        for reg in range(rCount):
1054            eWalkCode += '''
1055            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056            ''' % { "reg" : reg }
1057        iop = InstObjParams(name, Name,
1058                            "RegRegImmOp",
1059                            { "code": eWalkCode,
1060                              "r_count": rCount,
1061                              "predicate_test": predicateTest,
1062                              "op_class": opClass }, [])
1063        header_output += NeonRegRegImmOpDeclare.subst(iop)
1064        exec_output += NeonEqualRegExecute.subst(iop)
1065        for type in types:
1066            substDict = { "targs" : type,
1067                          "class_name" : Name }
1068            exec_output += NeonExecDeclare.subst(substDict)
1069
1070    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071        global header_output, exec_output
1072        eWalkCode = simdEnabledCheckCode + '''
1073        BigRegVect srcReg1;
1074        RegVect destReg;
1075        '''
1076        for reg in range(4):
1077            eWalkCode += '''
1078                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079            ''' % { "reg" : reg }
1080        if readDest:
1081            for reg in range(2):
1082                eWalkCode += '''
1083                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084                ''' % { "reg" : reg }
1085        readDestCode = ''
1086        if readDest:
1087            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1088        eWalkCode += '''
1089        for (unsigned i = 0; i < eCount; i++) {
1090            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1091            Element destElem;
1092            %(readDest)s
1093            %(op)s
1094            destReg.elements[i] = htog(destElem);
1095        }
1096        ''' % { "op" : op, "readDest" : readDestCode }
1097        for reg in range(2):
1098            eWalkCode += '''
1099            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100            ''' % { "reg" : reg }
1101        iop = InstObjParams(name, Name,
1102                            "RegRegImmOp",
1103                            { "code": eWalkCode,
1104                              "r_count": 2,
1105                              "predicate_test": predicateTest,
1106                              "op_class": opClass }, [])
1107        header_output += NeonRegRegImmOpDeclare.subst(iop)
1108        exec_output += NeonUnequalRegExecute.subst(iop)
1109        for type in types:
1110            substDict = { "targs" : type,
1111                          "class_name" : Name }
1112            exec_output += NeonExecDeclare.subst(substDict)
1113
1114    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115        global header_output, exec_output
1116        eWalkCode = simdEnabledCheckCode + '''
1117        RegVect srcReg1;
1118        BigRegVect destReg;
1119        '''
1120        for reg in range(2):
1121            eWalkCode += '''
1122                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123            ''' % { "reg" : reg }
1124        if readDest:
1125            for reg in range(4):
1126                eWalkCode += '''
1127                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128                ''' % { "reg" : reg }
1129        readDestCode = ''
1130        if readDest:
1131            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1132        eWalkCode += '''
1133        for (unsigned i = 0; i < eCount; i++) {
1134            Element srcElem1 = gtoh(srcReg1.elements[i]);
1135            BigElement destElem;
1136            %(readDest)s
1137            %(op)s
1138            destReg.elements[i] = htog(destElem);
1139        }
1140        ''' % { "op" : op, "readDest" : readDestCode }
1141        for reg in range(4):
1142            eWalkCode += '''
1143            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144            ''' % { "reg" : reg }
1145        iop = InstObjParams(name, Name,
1146                            "RegRegImmOp",
1147                            { "code": eWalkCode,
1148                              "r_count": 2,
1149                              "predicate_test": predicateTest,
1150                              "op_class": opClass }, [])
1151        header_output += NeonRegRegImmOpDeclare.subst(iop)
1152        exec_output += NeonUnequalRegExecute.subst(iop)
1153        for type in types:
1154            substDict = { "targs" : type,
1155                          "class_name" : Name }
1156            exec_output += NeonExecDeclare.subst(substDict)
1157
1158    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159        global header_output, exec_output
1160        eWalkCode = simdEnabledCheckCode + '''
1161        RegVect srcReg1, destReg;
1162        '''
1163        for reg in range(rCount):
1164            eWalkCode += '''
1165                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166            ''' % { "reg" : reg }
1167            if readDest:
1168                eWalkCode += '''
1169                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170                ''' % { "reg" : reg }
1171        readDestCode = ''
1172        if readDest:
1173            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1174        eWalkCode += '''
1175        for (unsigned i = 0; i < eCount; i++) {
1176            unsigned j = i;
1177            Element srcElem1 = gtoh(srcReg1.elements[i]);
1178            Element destElem;
1179            %(readDest)s
1180            %(op)s
1181            destReg.elements[j] = htog(destElem);
1182        }
1183        ''' % { "op" : op, "readDest" : readDestCode }
1184        for reg in range(rCount):
1185            eWalkCode += '''
1186            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187            ''' % { "reg" : reg }
1188        iop = InstObjParams(name, Name,
1189                            "RegRegOp",
1190                            { "code": eWalkCode,
1191                              "r_count": rCount,
1192                              "predicate_test": predicateTest,
1193                              "op_class": opClass }, [])
1194        header_output += NeonRegRegOpDeclare.subst(iop)
1195        exec_output += NeonEqualRegExecute.subst(iop)
1196        for type in types:
1197            substDict = { "targs" : type,
1198                          "class_name" : Name }
1199            exec_output += NeonExecDeclare.subst(substDict)
1200
1201    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202        global header_output, exec_output
1203        eWalkCode = simdEnabledCheckCode + '''
1204        RegVect srcReg1, destReg;
1205        '''
1206        for reg in range(rCount):
1207            eWalkCode += '''
1208                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209            ''' % { "reg" : reg }
1210            if readDest:
1211                eWalkCode += '''
1212                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213                ''' % { "reg" : reg }
1214        readDestCode = ''
1215        if readDest:
1216            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1217        eWalkCode += '''
1218        for (unsigned i = 0; i < eCount; i++) {
1219            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1220            Element destElem;
1221            %(readDest)s
1222            %(op)s
1223            destReg.elements[i] = htog(destElem);
1224        }
1225        ''' % { "op" : op, "readDest" : readDestCode }
1226        for reg in range(rCount):
1227            eWalkCode += '''
1228            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229            ''' % { "reg" : reg }
1230        iop = InstObjParams(name, Name,
1231                            "RegRegImmOp",
1232                            { "code": eWalkCode,
1233                              "r_count": rCount,
1234                              "predicate_test": predicateTest,
1235                              "op_class": opClass }, [])
1236        header_output += NeonRegRegImmOpDeclare.subst(iop)
1237        exec_output += NeonEqualRegExecute.subst(iop)
1238        for type in types:
1239            substDict = { "targs" : type,
1240                          "class_name" : Name }
1241            exec_output += NeonExecDeclare.subst(substDict)
1242
1243    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244        global header_output, exec_output
1245        eWalkCode = simdEnabledCheckCode + '''
1246        RegVect srcReg1, destReg;
1247        '''
1248        for reg in range(rCount):
1249            eWalkCode += '''
1250                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252            ''' % { "reg" : reg }
1253            if readDest:
1254                eWalkCode += '''
1255                ''' % { "reg" : reg }
1256        readDestCode = ''
1257        if readDest:
1258            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1259        eWalkCode += op
1260        for reg in range(rCount):
1261            eWalkCode += '''
1262            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264            ''' % { "reg" : reg }
1265        iop = InstObjParams(name, Name,
1266                            "RegRegOp",
1267                            { "code": eWalkCode,
1268                              "r_count": rCount,
1269                              "predicate_test": predicateTest,
1270                              "op_class": opClass }, [])
1271        header_output += NeonRegRegOpDeclare.subst(iop)
1272        exec_output += NeonEqualRegExecute.subst(iop)
1273        for type in types:
1274            substDict = { "targs" : type,
1275                          "class_name" : Name }
1276            exec_output += NeonExecDeclare.subst(substDict)
1277
1278    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279            readDest=False, toInt=False):
1280        global header_output, exec_output
1281        eWalkCode = simdEnabledCheckCode + '''
1282        typedef FloatReg FloatVect[rCount];
1283        FloatVect srcRegs1;
1284        '''
1285        if toInt:
1286            eWalkCode += 'RegVect destRegs;\n'
1287        else:
1288            eWalkCode += 'FloatVect destRegs;\n'
1289        for reg in range(rCount):
1290            eWalkCode += '''
1291                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292            ''' % { "reg" : reg }
1293            if readDest:
1294                if toInt:
1295                    eWalkCode += '''
1296                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297                    ''' % { "reg" : reg }
1298                else:
1299                    eWalkCode += '''
1300                        destRegs[%(reg)d] = FpDestP%(reg)d;
1301                    ''' % { "reg" : reg }
1302        readDestCode = ''
1303        if readDest:
1304            readDestCode = 'destReg = destRegs[i];'
1305        destType = 'FloatReg'
1306        writeDest = 'destRegs[r] = destReg;'
1307        if toInt:
1308            destType = 'FloatRegBits'
1309            writeDest = 'destRegs.regs[r] = destReg;'
1310        eWalkCode += '''
1311        for (unsigned r = 0; r < rCount; r++) {
1312            FloatReg srcReg1 = srcRegs1[r];
1313            %(destType)s destReg;
1314            %(readDest)s
1315            %(op)s
1316            %(writeDest)s
1317        }
1318        ''' % { "op" : op,
1319                "readDest" : readDestCode,
1320                "destType" : destType,
1321                "writeDest" : writeDest }
1322        for reg in range(rCount):
1323            if toInt:
1324                eWalkCode += '''
1325                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326                ''' % { "reg" : reg }
1327            else:
1328                eWalkCode += '''
1329                FpDestP%(reg)d = destRegs[%(reg)d];
1330                ''' % { "reg" : reg }
1331        iop = InstObjParams(name, Name,
1332                            "FpRegRegOp",
1333                            { "code": eWalkCode,
1334                              "r_count": rCount,
1335                              "predicate_test": predicateTest,
1336                              "op_class": opClass }, [])
1337        header_output += NeonRegRegOpDeclare.subst(iop)
1338        exec_output += NeonEqualRegExecute.subst(iop)
1339        for type in types:
1340            substDict = { "targs" : type,
1341                          "class_name" : Name }
1342            exec_output += NeonExecDeclare.subst(substDict)
1343
1344    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345        global header_output, exec_output
1346        eWalkCode = simdEnabledCheckCode + '''
1347        RegVect srcRegs;
1348        BigRegVect destReg;
1349        '''
1350        for reg in range(rCount):
1351            eWalkCode += '''
1352                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353            ''' % { "reg" : reg }
1354            if readDest:
1355                eWalkCode += '''
1356                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357                ''' % { "reg" : reg }
1358        readDestCode = ''
1359        if readDest:
1360            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1361        eWalkCode += '''
1362        for (unsigned i = 0; i < eCount / 2; i++) {
1363            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365            BigElement destElem;
1366            %(readDest)s
1367            %(op)s
1368            destReg.elements[i] = htog(destElem);
1369        }
1370        ''' % { "op" : op, "readDest" : readDestCode }
1371        for reg in range(rCount):
1372            eWalkCode += '''
1373            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374            ''' % { "reg" : reg }
1375        iop = InstObjParams(name, Name,
1376                            "RegRegOp",
1377                            { "code": eWalkCode,
1378                              "r_count": rCount,
1379                              "predicate_test": predicateTest,
1380                              "op_class": opClass }, [])
1381        header_output += NeonRegRegOpDeclare.subst(iop)
1382        exec_output += NeonUnequalRegExecute.subst(iop)
1383        for type in types:
1384            substDict = { "targs" : type,
1385                          "class_name" : Name }
1386            exec_output += NeonExecDeclare.subst(substDict)
1387
1388    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389        global header_output, exec_output
1390        eWalkCode = simdEnabledCheckCode + '''
1391        BigRegVect srcReg1;
1392        RegVect destReg;
1393        '''
1394        for reg in range(4):
1395            eWalkCode += '''
1396                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397            ''' % { "reg" : reg }
1398        if readDest:
1399            for reg in range(2):
1400                eWalkCode += '''
1401                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402                ''' % { "reg" : reg }
1403        readDestCode = ''
1404        if readDest:
1405            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1406        eWalkCode += '''
1407        for (unsigned i = 0; i < eCount; i++) {
1408            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1409            Element destElem;
1410            %(readDest)s
1411            %(op)s
1412            destReg.elements[i] = htog(destElem);
1413        }
1414        ''' % { "op" : op, "readDest" : readDestCode }
1415        for reg in range(2):
1416            eWalkCode += '''
1417            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418            ''' % { "reg" : reg }
1419        iop = InstObjParams(name, Name,
1420                            "RegRegOp",
1421                            { "code": eWalkCode,
1422                              "r_count": 2,
1423                              "predicate_test": predicateTest,
1424                              "op_class": opClass }, [])
1425        header_output += NeonRegRegOpDeclare.subst(iop)
1426        exec_output += NeonUnequalRegExecute.subst(iop)
1427        for type in types:
1428            substDict = { "targs" : type,
1429                          "class_name" : Name }
1430            exec_output += NeonExecDeclare.subst(substDict)
1431
1432    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433        global header_output, exec_output
1434        eWalkCode = simdEnabledCheckCode + '''
1435        RegVect destReg;
1436        '''
1437        if readDest:
1438            for reg in range(rCount):
1439                eWalkCode += '''
1440                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441                ''' % { "reg" : reg }
1442        readDestCode = ''
1443        if readDest:
1444            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1445        eWalkCode += '''
1446        for (unsigned i = 0; i < eCount; i++) {
1447            Element destElem;
1448            %(readDest)s
1449            %(op)s
1450            destReg.elements[i] = htog(destElem);
1451        }
1452        ''' % { "op" : op, "readDest" : readDestCode }
1453        for reg in range(rCount):
1454            eWalkCode += '''
1455            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456            ''' % { "reg" : reg }
1457        iop = InstObjParams(name, Name,
1458                            "RegImmOp",
1459                            { "code": eWalkCode,
1460                              "r_count": rCount,
1461                              "predicate_test": predicateTest,
1462                              "op_class": opClass }, [])
1463        header_output += NeonRegImmOpDeclare.subst(iop)
1464        exec_output += NeonEqualRegExecute.subst(iop)
1465        for type in types:
1466            substDict = { "targs" : type,
1467                          "class_name" : Name }
1468            exec_output += NeonExecDeclare.subst(substDict)
1469
1470    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471        global header_output, exec_output
1472        eWalkCode = simdEnabledCheckCode + '''
1473        RegVect srcReg1;
1474        BigRegVect destReg;
1475        '''
1476        for reg in range(2):
1477            eWalkCode += '''
1478                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479            ''' % { "reg" : reg }
1480        if readDest:
1481            for reg in range(4):
1482                eWalkCode += '''
1483                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484                ''' % { "reg" : reg }
1485        readDestCode = ''
1486        if readDest:
1487            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1488        eWalkCode += '''
1489        for (unsigned i = 0; i < eCount; i++) {
1490            Element srcElem1 = gtoh(srcReg1.elements[i]);
1491            BigElement destElem;
1492            %(readDest)s
1493            %(op)s
1494            destReg.elements[i] = htog(destElem);
1495        }
1496        ''' % { "op" : op, "readDest" : readDestCode }
1497        for reg in range(4):
1498            eWalkCode += '''
1499            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500            ''' % { "reg" : reg }
1501        iop = InstObjParams(name, Name,
1502                            "RegRegOp",
1503                            { "code": eWalkCode,
1504                              "r_count": 2,
1505                              "predicate_test": predicateTest,
1506                              "op_class": opClass }, [])
1507        header_output += NeonRegRegOpDeclare.subst(iop)
1508        exec_output += NeonUnequalRegExecute.subst(iop)
1509        for type in types:
1510            substDict = { "targs" : type,
1511                          "class_name" : Name }
1512            exec_output += NeonExecDeclare.subst(substDict)
1513
1514    vhaddCode = '''
1515        Element carryBit =
1516            (((unsigned)srcElem1 & 0x1) +
1517             ((unsigned)srcElem2 & 0x1)) >> 1;
1518        // Use division instead of a shift to ensure the sign extension works
1519        // right. The compiler will figure out if it can be a shift. Mask the
1520        // inputs so they get truncated correctly.
1521        destElem = (((srcElem1 & ~(Element)1) / 2) +
1522                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1523    '''
1524    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1526
1527    vrhaddCode = '''
1528        Element carryBit =
1529            (((unsigned)srcElem1 & 0x1) +
1530             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531        // Use division instead of a shift to ensure the sign extension works
1532        // right. The compiler will figure out if it can be a shift. Mask the
1533        // inputs so they get truncated correctly.
1534        destElem = (((srcElem1 & ~(Element)1) / 2) +
1535                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1536    '''
1537    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1539
1540    vhsubCode = '''
1541        Element barrowBit =
1542            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543        // Use division instead of a shift to ensure the sign extension works
1544        // right. The compiler will figure out if it can be a shift. Mask the
1545        // inputs so they get truncated correctly.
1546        destElem = (((srcElem1 & ~(Element)1) / 2) -
1547                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1548    '''
1549    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1551
1552    vandCode = '''
1553        destElem = srcElem1 & srcElem2;
1554    '''
1555    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1557
1558    vbicCode = '''
1559        destElem = srcElem1 & ~srcElem2;
1560    '''
1561    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1563
1564    vorrCode = '''
1565        destElem = srcElem1 | srcElem2;
1566    '''
1567    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1569
1570    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1572
1573    vornCode = '''
1574        destElem = srcElem1 | ~srcElem2;
1575    '''
1576    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1578
1579    veorCode = '''
1580        destElem = srcElem1 ^ srcElem2;
1581    '''
1582    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1584
1585    vbifCode = '''
1586        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1587    '''
1588    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1590    vbitCode = '''
1591        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1592    '''
1593    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1595    vbslCode = '''
1596        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1597    '''
1598    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1600
1601    vmaxCode = '''
1602        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1603    '''
1604    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1606
1607    vminCode = '''
1608        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1609    '''
1610    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613    vaddCode = '''
1614        destElem = srcElem1 + srcElem2;
1615    '''
1616    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1619    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1620                      2, vaddCode, pairwise=True)
1621    threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1622                      4, vaddCode, pairwise=True)
1623    vaddlwCode = '''
1624        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1625    '''
1626    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1627    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1628    vaddhnCode = '''
1629        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1630                   (sizeof(Element) * 8);
1631    '''
1632    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1633    vraddhnCode = '''
1634        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1635                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1636                   (sizeof(Element) * 8);
1637    '''
1638    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1639
1640    vsubCode = '''
1641        destElem = srcElem1 - srcElem2;
1642    '''
1643    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1644    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1645    vsublwCode = '''
1646        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1647    '''
1648    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1649    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1650
1651    vqaddUCode = '''
1652        destElem = srcElem1 + srcElem2;
1653        FPSCR fpscr = (FPSCR) FpscrQc;
1654        if (destElem < srcElem1 || destElem < srcElem2) {
1655            destElem = (Element)(-1);
1656            fpscr.qc = 1;
1657        }
1658        FpscrQc = fpscr;
1659    '''
1660    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1661    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1662    vsubhnCode = '''
1663        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1664                   (sizeof(Element) * 8);
1665    '''
1666    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1667    vrsubhnCode = '''
1668        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1669                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1670                   (sizeof(Element) * 8);
1671    '''
1672    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1673
1674    vqaddSCode = '''
1675        destElem = srcElem1 + srcElem2;
1676        FPSCR fpscr = (FPSCR) FpscrQc;
1677        bool negDest = (destElem < 0);
1678        bool negSrc1 = (srcElem1 < 0);
1679        bool negSrc2 = (srcElem2 < 0);
1680        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1681            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1682            if (negDest)
1683                destElem -= 1;
1684            fpscr.qc = 1;
1685        }
1686        FpscrQc = fpscr;
1687    '''
1688    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1689    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1690
1691    vqsubUCode = '''
1692        destElem = srcElem1 - srcElem2;
1693        FPSCR fpscr = (FPSCR) FpscrQc;
1694        if (destElem > srcElem1) {
1695            destElem = 0;
1696            fpscr.qc = 1;
1697        }
1698        FpscrQc = fpscr;
1699    '''
1700    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1701    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1702
1703    vqsubSCode = '''
1704        destElem = srcElem1 - srcElem2;
1705        FPSCR fpscr = (FPSCR) FpscrQc;
1706        bool negDest = (destElem < 0);
1707        bool negSrc1 = (srcElem1 < 0);
1708        bool posSrc2 = (srcElem2 >= 0);
1709        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1710            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1711            if (negDest)
1712                destElem -= 1;
1713            fpscr.qc = 1;
1714        }
1715        FpscrQc = fpscr;
1716    '''
1717    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1718    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1719
1720    vcgtCode = '''
1721        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1722    '''
1723    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1724    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1725
1726    vcgeCode = '''
1727        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1728    '''
1729    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1730    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1731
1732    vceqCode = '''
1733        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1734    '''
1735    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1736    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1737
1738    vshlCode = '''
1739        int16_t shiftAmt = (int8_t)srcElem2;
1740        if (shiftAmt < 0) {
1741            shiftAmt = -shiftAmt;
1742            if (shiftAmt >= sizeof(Element) * 8) {
1743                shiftAmt = sizeof(Element) * 8 - 1;
1744                destElem = 0;
1745            } else {
1746                destElem = (srcElem1 >> shiftAmt);
1747            }
1748            // Make sure the right shift sign extended when it should.
1749            if (ltz(srcElem1) && !ltz(destElem)) {
1750                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1751                                             1 - shiftAmt));
1752            }
1753        } else {
1754            if (shiftAmt >= sizeof(Element) * 8) {
1755                destElem = 0;
1756            } else {
1757                destElem = srcElem1 << shiftAmt;
1758            }
1759        }
1760    '''
1761    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1762    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1763
1764    vrshlCode = '''
1765        int16_t shiftAmt = (int8_t)srcElem2;
1766        if (shiftAmt < 0) {
1767            shiftAmt = -shiftAmt;
1768            Element rBit = 0;
1769            if (shiftAmt <= sizeof(Element) * 8)
1770                rBit = bits(srcElem1, shiftAmt - 1);
1771            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1772                rBit = 1;
1773            if (shiftAmt >= sizeof(Element) * 8) {
1774                shiftAmt = sizeof(Element) * 8 - 1;
1775                destElem = 0;
1776            } else {
1777                destElem = (srcElem1 >> shiftAmt);
1778            }
1779            // Make sure the right shift sign extended when it should.
1780            if (ltz(srcElem1) && !ltz(destElem)) {
1781                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1782                                             1 - shiftAmt));
1783            }
1784            destElem += rBit;
1785        } else if (shiftAmt > 0) {
1786            if (shiftAmt >= sizeof(Element) * 8) {
1787                destElem = 0;
1788            } else {
1789                destElem = srcElem1 << shiftAmt;
1790            }
1791        } else {
1792            destElem = srcElem1;
1793        }
1794    '''
1795    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1796    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1797
1798    vqshlUCode = '''
1799        int16_t shiftAmt = (int8_t)srcElem2;
1800        FPSCR fpscr = (FPSCR) FpscrQc;
1801        if (shiftAmt < 0) {
1802            shiftAmt = -shiftAmt;
1803            if (shiftAmt >= sizeof(Element) * 8) {
1804                shiftAmt = sizeof(Element) * 8 - 1;
1805                destElem = 0;
1806            } else {
1807                destElem = (srcElem1 >> shiftAmt);
1808            }
1809        } else if (shiftAmt > 0) {
1810            if (shiftAmt >= sizeof(Element) * 8) {
1811                if (srcElem1 != 0) {
1812                    destElem = mask(sizeof(Element) * 8);
1813                    fpscr.qc = 1;
1814                } else {
1815                    destElem = 0;
1816                }
1817            } else {
1818                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1819                            sizeof(Element) * 8 - shiftAmt)) {
1820                    destElem = mask(sizeof(Element) * 8);
1821                    fpscr.qc = 1;
1822                } else {
1823                    destElem = srcElem1 << shiftAmt;
1824                }
1825            }
1826        } else {
1827            destElem = srcElem1;
1828        }
1829        FpscrQc = fpscr;
1830    '''
1831    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1832    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1833
1834    vqshlSCode = '''
1835        int16_t shiftAmt = (int8_t)srcElem2;
1836        FPSCR fpscr = (FPSCR) FpscrQc;
1837        if (shiftAmt < 0) {
1838            shiftAmt = -shiftAmt;
1839            if (shiftAmt >= sizeof(Element) * 8) {
1840                shiftAmt = sizeof(Element) * 8 - 1;
1841                destElem = 0;
1842            } else {
1843                destElem = (srcElem1 >> shiftAmt);
1844            }
1845            // Make sure the right shift sign extended when it should.
1846            if (srcElem1 < 0 && destElem >= 0) {
1847                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1848                                             1 - shiftAmt));
1849            }
1850        } else if (shiftAmt > 0) {
1851            bool sat = false;
1852            if (shiftAmt >= sizeof(Element) * 8) {
1853                if (srcElem1 != 0)
1854                    sat = true;
1855                else
1856                    destElem = 0;
1857            } else {
1858                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1859                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1860                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1861                    sat = true;
1862                } else {
1863                    destElem = srcElem1 << shiftAmt;
1864                }
1865            }
1866            if (sat) {
1867                fpscr.qc = 1;
1868                destElem = mask(sizeof(Element) * 8 - 1);
1869                if (srcElem1 < 0)
1870                    destElem = ~destElem;
1871            }
1872        } else {
1873            destElem = srcElem1;
1874        }
1875        FpscrQc = fpscr;
1876    '''
1877    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1878    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1879
1880    vqrshlUCode = '''
1881        int16_t shiftAmt = (int8_t)srcElem2;
1882        FPSCR fpscr = (FPSCR) FpscrQc;
1883        if (shiftAmt < 0) {
1884            shiftAmt = -shiftAmt;
1885            Element rBit = 0;
1886            if (shiftAmt <= sizeof(Element) * 8)
1887                rBit = bits(srcElem1, shiftAmt - 1);
1888            if (shiftAmt >= sizeof(Element) * 8) {
1889                shiftAmt = sizeof(Element) * 8 - 1;
1890                destElem = 0;
1891            } else {
1892                destElem = (srcElem1 >> shiftAmt);
1893            }
1894            destElem += rBit;
1895        } else {
1896            if (shiftAmt >= sizeof(Element) * 8) {
1897                if (srcElem1 != 0) {
1898                    destElem = mask(sizeof(Element) * 8);
1899                    fpscr.qc = 1;
1900                } else {
1901                    destElem = 0;
1902                }
1903            } else {
1904                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1905                            sizeof(Element) * 8 - shiftAmt)) {
1906                    destElem = mask(sizeof(Element) * 8);
1907                    fpscr.qc = 1;
1908                } else {
1909                    destElem = srcElem1 << shiftAmt;
1910                }
1911            }
1912        }
1913        FpscrQc = fpscr;
1914    '''
1915    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1916    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1917
1918    vqrshlSCode = '''
1919        int16_t shiftAmt = (int8_t)srcElem2;
1920        FPSCR fpscr = (FPSCR) FpscrQc;
1921        if (shiftAmt < 0) {
1922            shiftAmt = -shiftAmt;
1923            Element rBit = 0;
1924            if (shiftAmt <= sizeof(Element) * 8)
1925                rBit = bits(srcElem1, shiftAmt - 1);
1926            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1927                rBit = 1;
1928            if (shiftAmt >= sizeof(Element) * 8) {
1929                shiftAmt = sizeof(Element) * 8 - 1;
1930                destElem = 0;
1931            } else {
1932                destElem = (srcElem1 >> shiftAmt);
1933            }
1934            // Make sure the right shift sign extended when it should.
1935            if (srcElem1 < 0 && destElem >= 0) {
1936                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1937                                             1 - shiftAmt));
1938            }
1939            destElem += rBit;
1940        } else if (shiftAmt > 0) {
1941            bool sat = false;
1942            if (shiftAmt >= sizeof(Element) * 8) {
1943                if (srcElem1 != 0)
1944                    sat = true;
1945                else
1946                    destElem = 0;
1947            } else {
1948                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1949                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1950                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1951                    sat = true;
1952                } else {
1953                    destElem = srcElem1 << shiftAmt;
1954                }
1955            }
1956            if (sat) {
1957                fpscr.qc = 1;
1958                destElem = mask(sizeof(Element) * 8 - 1);
1959                if (srcElem1 < 0)
1960                    destElem = ~destElem;
1961            }
1962        } else {
1963            destElem = srcElem1;
1964        }
1965        FpscrQc = fpscr;
1966    '''
1967    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1968    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1969
1970    vabaCode = '''
1971        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1972                                            (srcElem2 - srcElem1);
1973    '''
1974    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1975    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1976    vabalCode = '''
1977        destElem += (srcElem1 > srcElem2) ?
1978            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1979            ((BigElement)srcElem2 - (BigElement)srcElem1);
1980    '''
1981    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1982
1983    vabdCode = '''
1984        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1985                                           (srcElem2 - srcElem1);
1986    '''
1987    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1988    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1989    vabdlCode = '''
1990        destElem = (srcElem1 > srcElem2) ?
1991            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1992            ((BigElement)srcElem2 - (BigElement)srcElem1);
1993    '''
1994    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1995
1996    vtstCode = '''
1997        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1998    '''
1999    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2000    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2001
2002    vmulCode = '''
2003        destElem = srcElem1 * srcElem2;
2004    '''
2005    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2006    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2007    vmullCode = '''
2008        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2009    '''
2010    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2011
2012    vmlaCode = '''
2013        destElem = destElem + srcElem1 * srcElem2;
2014    '''
2015    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2016    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2017    vmlalCode = '''
2018        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2019    '''
2020    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2021
2022    vqdmlalCode = '''
2023        FPSCR fpscr = (FPSCR) FpscrQc;
2024        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2025        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2026        Element halfNeg = maxNeg / 2;
2027        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2028            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2029            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2030            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2031            fpscr.qc = 1;
2032        }
2033        bool negPreDest = ltz(destElem);
2034        destElem += midElem;
2035        bool negDest = ltz(destElem);
2036        bool negMid = ltz(midElem);
2037        if (negPreDest == negMid && negMid != negDest) {
2038            destElem = mask(sizeof(BigElement) * 8 - 1);
2039            if (negPreDest)
2040                destElem = ~destElem;
2041            fpscr.qc = 1;
2042        }
2043        FpscrQc = fpscr;
2044    '''
2045    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2046
2047    vqdmlslCode = '''
2048        FPSCR fpscr = (FPSCR) FpscrQc;
2049        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2050        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2051        Element halfNeg = maxNeg / 2;
2052        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2053            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2054            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2055            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2056            fpscr.qc = 1;
2057        }
2058        bool negPreDest = ltz(destElem);
2059        destElem -= midElem;
2060        bool negDest = ltz(destElem);
2061        bool posMid = ltz((BigElement)-midElem);
2062        if (negPreDest == posMid && posMid != negDest) {
2063            destElem = mask(sizeof(BigElement) * 8 - 1);
2064            if (negPreDest)
2065                destElem = ~destElem;
2066            fpscr.qc = 1;
2067        }
2068        FpscrQc = fpscr;
2069    '''
2070    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2071
2072    vqdmullCode = '''
2073        FPSCR fpscr = (FPSCR) FpscrQc;
2074        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2075        if (srcElem1 == srcElem2 &&
2076                srcElem1 == (Element)((Element)1 <<
2077                    (Element)(sizeof(Element) * 8 - 1))) {
2078            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2079            fpscr.qc = 1;
2080        }
2081        FpscrQc = fpscr;
2082    '''
2083    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2084
2085    vmlsCode = '''
2086        destElem = destElem - srcElem1 * srcElem2;
2087    '''
2088    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2089    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2090    vmlslCode = '''
2091        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2092    '''
2093    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2094
2095    vmulpCode = '''
2096        destElem = 0;
2097        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2098            if (bits(srcElem2, j))
2099                destElem ^= srcElem1 << j;
2100        }
2101    '''
2102    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2103    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2104    vmullpCode = '''
2105        destElem = 0;
2106        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2107            if (bits(srcElem2, j))
2108                destElem ^= (BigElement)srcElem1 << j;
2109        }
2110    '''
2111    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2112
2113    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2114    threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2115
2116    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2117    threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2118
2119    vqdmulhCode = '''
2120        FPSCR fpscr = (FPSCR) FpscrQc;
2121        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2122                   (sizeof(Element) * 8);
2123        if (srcElem1 == srcElem2 &&
2124                srcElem1 == (Element)((Element)1 <<
2125                    (sizeof(Element) * 8 - 1))) {
2126            destElem = ~srcElem1;
2127            fpscr.qc = 1;
2128        }
2129        FpscrQc = fpscr;
2130    '''
2131    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2132    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2133
2134    vqrdmulhCode = '''
2135        FPSCR fpscr = (FPSCR) FpscrQc;
2136        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2137                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2138                   (sizeof(Element) * 8);
2139        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2140        Element halfNeg = maxNeg / 2;
2141        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2142            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2143            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2144            if (destElem < 0) {
2145                destElem = mask(sizeof(Element) * 8 - 1);
2146            } else {
2147                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2148            }
2149            fpscr.qc = 1;
2150        }
2151        FpscrQc = fpscr;
2152    '''
2153    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2154            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2155    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2156            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2157
2158    vmaxfpCode = '''
2159        FPSCR fpscr = (FPSCR) FpscrExc;
2160        bool done;
2161        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2162        if (!done) {
2163            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2164                               true, true, VfpRoundNearest);
2165        } else if (flushToZero(srcReg1, srcReg2)) {
2166            fpscr.idc = 1;
2167        }
2168        FpscrExc = fpscr;
2169    '''
2170    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2171    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2172
2173    vminfpCode = '''
2174        FPSCR fpscr = (FPSCR) FpscrExc;
2175        bool done;
2176        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2177        if (!done) {
2178            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2179                               true, true, VfpRoundNearest);
2180        } else if (flushToZero(srcReg1, srcReg2)) {
2181            fpscr.idc = 1;
2182        }
2183        FpscrExc = fpscr;
2184    '''
2185    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2186    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2187
2188    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2189                        2, vmaxfpCode, pairwise=True)
2190    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2191                        4, vmaxfpCode, pairwise=True)
2192
2193    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2194                        2, vminfpCode, pairwise=True)
2195    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2196                        4, vminfpCode, pairwise=True)
2197
2198    vaddfpCode = '''
2199        FPSCR fpscr = (FPSCR) FpscrExc;
2200        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2201                           true, true, VfpRoundNearest);
2202        FpscrExc = fpscr;
2203    '''
2204    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2205    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2206
2207    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2208                        2, vaddfpCode, pairwise=True)
2209    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2210                        4, vaddfpCode, pairwise=True)
2211
2212    vsubfpCode = '''
2213        FPSCR fpscr = (FPSCR) FpscrExc;
2214        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2215                           true, true, VfpRoundNearest);
2216        FpscrExc = fpscr;
2217    '''
2218    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2219    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2220
2221    vmulfpCode = '''
2222        FPSCR fpscr = (FPSCR) FpscrExc;
2223        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2224                           true, true, VfpRoundNearest);
2225        FpscrExc = fpscr;
2226    '''
2227    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2228    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2229
2230    vmlafpCode = '''
2231        FPSCR fpscr = (FPSCR) FpscrExc;
2232        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2233                             true, true, VfpRoundNearest);
2234        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2235                           true, true, VfpRoundNearest);
2236        FpscrExc = fpscr;
2237    '''
2238    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2239    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2240
2241    vmlsfpCode = '''
2242        FPSCR fpscr = (FPSCR) FpscrExc;
2243        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2244                             true, true, VfpRoundNearest);
2245        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2246                           true, true, VfpRoundNearest);
2247        FpscrExc = fpscr;
2248    '''
2249    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2250    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2251
2252    vcgtfpCode = '''
2253        FPSCR fpscr = (FPSCR) FpscrExc;
2254        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2255                             true, true, VfpRoundNearest);
2256        destReg = (res == 0) ? -1 : 0;
2257        if (res == 2.0)
2258            fpscr.ioc = 1;
2259        FpscrExc = fpscr;
2260    '''
2261    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2262            2, vcgtfpCode, toInt = True)
2263    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2264            4, vcgtfpCode, toInt = True)
2265
2266    vcgefpCode = '''
2267        FPSCR fpscr = (FPSCR) FpscrExc;
2268        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2269                             true, true, VfpRoundNearest);
2270        destReg = (res == 0) ? -1 : 0;
2271        if (res == 2.0)
2272            fpscr.ioc = 1;
2273        FpscrExc = fpscr;
2274    '''
2275    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2276            2, vcgefpCode, toInt = True)
2277    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2278            4, vcgefpCode, toInt = True)
2279
2280    vacgtfpCode = '''
2281        FPSCR fpscr = (FPSCR) FpscrExc;
2282        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2283                             true, true, VfpRoundNearest);
2284        destReg = (res == 0) ? -1 : 0;
2285        if (res == 2.0)
2286            fpscr.ioc = 1;
2287        FpscrExc = fpscr;
2288    '''
2289    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2290            2, vacgtfpCode, toInt = True)
2291    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2292            4, vacgtfpCode, toInt = True)
2293
2294    vacgefpCode = '''
2295        FPSCR fpscr = (FPSCR) FpscrExc;
2296        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2297                             true, true, VfpRoundNearest);
2298        destReg = (res == 0) ? -1 : 0;
2299        if (res == 2.0)
2300            fpscr.ioc = 1;
2301        FpscrExc = fpscr;
2302    '''
2303    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2304            2, vacgefpCode, toInt = True)
2305    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2306            4, vacgefpCode, toInt = True)
2307
2308    vceqfpCode = '''
2309        FPSCR fpscr = (FPSCR) FpscrExc;
2310        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2311                             true, true, VfpRoundNearest);
2312        destReg = (res == 0) ? -1 : 0;
2313        if (res == 2.0)
2314            fpscr.ioc = 1;
2315        FpscrExc = fpscr;
2316    '''
2317    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2318            2, vceqfpCode, toInt = True)
2319    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2320            4, vceqfpCode, toInt = True)
2321
2322    vrecpsCode = '''
2323        FPSCR fpscr = (FPSCR) FpscrExc;
2324        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2325                           true, true, VfpRoundNearest);
2326        FpscrExc = fpscr;
2327    '''
2328    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2329    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2330
2331    vrsqrtsCode = '''
2332        FPSCR fpscr = (FPSCR) FpscrExc;
2333        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2334                           true, true, VfpRoundNearest);
2335        FpscrExc = fpscr;
2336    '''
2337    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2338    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2339
2340    vabdfpCode = '''
2341        FPSCR fpscr = (FPSCR) FpscrExc;
2342        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2343                             true, true, VfpRoundNearest);
2344        destReg = fabs(mid);
2345        FpscrExc = fpscr;
2346    '''
2347    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2348    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2349
2350    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2351    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2352    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2353    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2354    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2355
2356    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2357    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2358    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2359    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2360    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2361
2362    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2363    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2364    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2365    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2366    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2367
2368    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2369    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2370    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2371    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2372    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2373    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2374            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2375    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2376            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2377
2378    vshrCode = '''
2379        if (imm >= sizeof(srcElem1) * 8) {
2380            if (ltz(srcElem1))
2381                destElem = -1;
2382            else
2383                destElem = 0;
2384        } else {
2385            destElem = srcElem1 >> imm;
2386        }
2387    '''
2388    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2389    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2390
2391    vsraCode = '''
2392        Element mid;;
2393        if (imm >= sizeof(srcElem1) * 8) {
2394            mid = ltz(srcElem1) ? -1 : 0;
2395        } else {
2396            mid = srcElem1 >> imm;
2397            if (ltz(srcElem1) && !ltz(mid)) {
2398                mid |= -(mid & ((Element)1 <<
2399                            (sizeof(Element) * 8 - 1 - imm)));
2400            }
2401        }
2402        destElem += mid;
2403    '''
2404    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2405    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2406
2407    vrshrCode = '''
2408        if (imm > sizeof(srcElem1) * 8) {
2409            destElem = 0;
2410        } else if (imm) {
2411            Element rBit = bits(srcElem1, imm - 1);
2412            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2413        } else {
2414            destElem = srcElem1;
2415        }
2416    '''
2417    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2418    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2419
2420    vrsraCode = '''
2421        if (imm > sizeof(srcElem1) * 8) {
2422            destElem += 0;
2423        } else if (imm) {
2424            Element rBit = bits(srcElem1, imm - 1);
2425            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2426        } else {
2427            destElem += srcElem1;
2428        }
2429    '''
2430    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2431    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2432
2433    vsriCode = '''
2434        if (imm >= sizeof(Element) * 8)
2435            destElem = destElem;
2436        else
2437            destElem = (srcElem1 >> imm) |
2438                (destElem & ~mask(sizeof(Element) * 8 - imm));
2439    '''
2440    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2441    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2442
2443    vshlCode = '''
2444        if (imm >= sizeof(Element) * 8)
2445            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2446        else
2447            destElem = srcElem1 << imm;
2448    '''
2449    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2450    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2451
2452    vsliCode = '''
2453        if (imm >= sizeof(Element) * 8)
2454            destElem = destElem;
2455        else
2456            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2457    '''
2458    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2459    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2460
2461    vqshlCode = '''
2462        FPSCR fpscr = (FPSCR) FpscrQc;
2463        if (imm >= sizeof(Element) * 8) {
2464            if (srcElem1 != 0) {
2465                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2466                if (srcElem1 > 0)
2467                    destElem = ~destElem;
2468                fpscr.qc = 1;
2469            } else {
2470                destElem = 0;
2471            }
2472        } else if (imm) {
2473            destElem = (srcElem1 << imm);
2474            uint64_t topBits = bits((uint64_t)srcElem1,
2475                                    sizeof(Element) * 8 - 1,
2476                                    sizeof(Element) * 8 - 1 - imm);
2477            if (topBits != 0 && topBits != mask(imm + 1)) {
2478                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2479                if (srcElem1 > 0)
2480                    destElem = ~destElem;
2481                fpscr.qc = 1;
2482            }
2483        } else {
2484            destElem = srcElem1;
2485        }
2486        FpscrQc = fpscr;
2487    '''
2488    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2489    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2490
2491    vqshluCode = '''
2492        FPSCR fpscr = (FPSCR) FpscrQc;
2493        if (imm >= sizeof(Element) * 8) {
2494            if (srcElem1 != 0) {
2495                destElem = mask(sizeof(Element) * 8);
2496                fpscr.qc = 1;
2497            } else {
2498                destElem = 0;
2499            }
2500        } else if (imm) {
2501            destElem = (srcElem1 << imm);
2502            uint64_t topBits = bits((uint64_t)srcElem1,
2503                                    sizeof(Element) * 8 - 1,
2504                                    sizeof(Element) * 8 - imm);
2505            if (topBits != 0) {
2506                destElem = mask(sizeof(Element) * 8);
2507                fpscr.qc = 1;
2508            }
2509        } else {
2510            destElem = srcElem1;
2511        }
2512        FpscrQc = fpscr;
2513    '''
2514    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2515    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2516
2517    vqshlusCode = '''
2518        FPSCR fpscr = (FPSCR) FpscrQc;
2519        if (imm >= sizeof(Element) * 8) {
2520            if (srcElem1 < 0) {
2521                destElem = 0;
2522                fpscr.qc = 1;
2523            } else if (srcElem1 > 0) {
2524                destElem = mask(sizeof(Element) * 8);
2525                fpscr.qc = 1;
2526            } else {
2527                destElem = 0;
2528            }
2529        } else if (imm) {
2530            destElem = (srcElem1 << imm);
2531            uint64_t topBits = bits((uint64_t)srcElem1,
2532                                    sizeof(Element) * 8 - 1,
2533                                    sizeof(Element) * 8 - imm);
2534            if (srcElem1 < 0) {
2535                destElem = 0;
2536                fpscr.qc = 1;
2537            } else if (topBits != 0) {
2538                destElem = mask(sizeof(Element) * 8);
2539                fpscr.qc = 1;
2540            }
2541        } else {
2542            if (srcElem1 < 0) {
2543                fpscr.qc = 1;
2544                destElem = 0;
2545            } else {
2546                destElem = srcElem1;
2547            }
2548        }
2549        FpscrQc = fpscr;
2550    '''
2551    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2552    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2553
2554    vshrnCode = '''
2555        if (imm >= sizeof(srcElem1) * 8) {
2556            destElem = 0;
2557        } else {
2558            destElem = srcElem1 >> imm;
2559        }
2560    '''
2561    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2562
2563    vrshrnCode = '''
2564        if (imm > sizeof(srcElem1) * 8) {
2565            destElem = 0;
2566        } else if (imm) {
2567            Element rBit = bits(srcElem1, imm - 1);
2568            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2569        } else {
2570            destElem = srcElem1;
2571        }
2572    '''
2573    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2574
2575    vqshrnCode = '''
2576        FPSCR fpscr = (FPSCR) FpscrQc;
2577        if (imm > sizeof(srcElem1) * 8) {
2578            if (srcElem1 != 0 && srcElem1 != -1)
2579                fpscr.qc = 1;
2580            destElem = 0;
2581        } else if (imm) {
2582            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2583            mid |= -(mid & ((BigElement)1 <<
2584                        (sizeof(BigElement) * 8 - 1 - imm)));
2585            if (mid != (Element)mid) {
2586                destElem = mask(sizeof(Element) * 8 - 1);
2587                if (srcElem1 < 0)
2588                    destElem = ~destElem;
2589                fpscr.qc = 1;
2590            } else {
2591                destElem = mid;
2592            }
2593        } else {
2594            destElem = srcElem1;
2595        }
2596        FpscrQc = fpscr;
2597    '''
2598    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2599
2600    vqshrunCode = '''
2601        FPSCR fpscr = (FPSCR) FpscrQc;
2602        if (imm > sizeof(srcElem1) * 8) {
2603            if (srcElem1 != 0)
2604                fpscr.qc = 1;
2605            destElem = 0;
2606        } else if (imm) {
2607            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2608            if (mid != (Element)mid) {
2609                destElem = mask(sizeof(Element) * 8);
2610                fpscr.qc = 1;
2611            } else {
2612                destElem = mid;
2613            }
2614        } else {
2615            destElem = srcElem1;
2616        }
2617        FpscrQc = fpscr;
2618    '''
2619    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2620                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2621
2622    vqshrunsCode = '''
2623        FPSCR fpscr = (FPSCR) FpscrQc;
2624        if (imm > sizeof(srcElem1) * 8) {
2625            if (srcElem1 != 0)
2626                fpscr.qc = 1;
2627            destElem = 0;
2628        } else if (imm) {
2629            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2630            if (bits(mid, sizeof(BigElement) * 8 - 1,
2631                          sizeof(Element) * 8) != 0) {
2632                if (srcElem1 < 0) {
2633                    destElem = 0;
2634                } else {
2635                    destElem = mask(sizeof(Element) * 8);
2636                }
2637                fpscr.qc = 1;
2638            } else {
2639                destElem = mid;
2640            }
2641        } else {
2642            destElem = srcElem1;
2643        }
2644        FpscrQc = fpscr;
2645    '''
2646    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2647                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2648
2649    vqrshrnCode = '''
2650        FPSCR fpscr = (FPSCR) FpscrQc;
2651        if (imm > sizeof(srcElem1) * 8) {
2652            if (srcElem1 != 0 && srcElem1 != -1)
2653                fpscr.qc = 1;
2654            destElem = 0;
2655        } else if (imm) {
2656            BigElement mid = (srcElem1 >> (imm - 1));
2657            uint64_t rBit = mid & 0x1;
2658            mid >>= 1;
2659            mid |= -(mid & ((BigElement)1 <<
2660                        (sizeof(BigElement) * 8 - 1 - imm)));
2661            mid += rBit;
2662            if (mid != (Element)mid) {
2663                destElem = mask(sizeof(Element) * 8 - 1);
2664                if (srcElem1 < 0)
2665                    destElem = ~destElem;
2666                fpscr.qc = 1;
2667            } else {
2668                destElem = mid;
2669            }
2670        } else {
2671            if (srcElem1 != (Element)srcElem1) {
2672                destElem = mask(sizeof(Element) * 8 - 1);
2673                if (srcElem1 < 0)
2674                    destElem = ~destElem;
2675                fpscr.qc = 1;
2676            } else {
2677                destElem = srcElem1;
2678            }
2679        }
2680        FpscrQc = fpscr;
2681    '''
2682    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2683                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2684
2685    vqrshrunCode = '''
2686        FPSCR fpscr = (FPSCR) FpscrQc;
2687        if (imm > sizeof(srcElem1) * 8) {
2688            if (srcElem1 != 0)
2689                fpscr.qc = 1;
2690            destElem = 0;
2691        } else if (imm) {
2692            BigElement mid = (srcElem1 >> (imm - 1));
2693            uint64_t rBit = mid & 0x1;
2694            mid >>= 1;
2695            mid += rBit;
2696            if (mid != (Element)mid) {
2697                destElem = mask(sizeof(Element) * 8);
2698                fpscr.qc = 1;
2699            } else {
2700                destElem = mid;
2701            }
2702        } else {
2703            if (srcElem1 != (Element)srcElem1) {
2704                destElem = mask(sizeof(Element) * 8 - 1);
2705                fpscr.qc = 1;
2706            } else {
2707                destElem = srcElem1;
2708            }
2709        }
2710        FpscrQc = fpscr;
2711    '''
2712    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2713                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2714
2715    vqrshrunsCode = '''
2716        FPSCR fpscr = (FPSCR) FpscrQc;
2717        if (imm > sizeof(srcElem1) * 8) {
2718            if (srcElem1 != 0)
2719                fpscr.qc = 1;
2720            destElem = 0;
2721        } else if (imm) {
2722            BigElement mid = (srcElem1 >> (imm - 1));
2723            uint64_t rBit = mid & 0x1;
2724            mid >>= 1;
2725            mid |= -(mid & ((BigElement)1 <<
2726                            (sizeof(BigElement) * 8 - 1 - imm)));
2727            mid += rBit;
2728            if (bits(mid, sizeof(BigElement) * 8 - 1,
2729                          sizeof(Element) * 8) != 0) {
2730                if (srcElem1 < 0) {
2731                    destElem = 0;
2732                } else {
2733                    destElem = mask(sizeof(Element) * 8);
2734                }
2735                fpscr.qc = 1;
2736            } else {
2737                destElem = mid;
2738            }
2739        } else {
2740            if (srcElem1 < 0) {
2741                fpscr.qc = 1;
2742                destElem = 0;
2743            } else {
2744                destElem = srcElem1;
2745            }
2746        }
2747        FpscrQc = fpscr;
2748    '''
2749    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2750                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2751
2752    vshllCode = '''
2753        if (imm >= sizeof(destElem) * 8) {
2754            destElem = 0;
2755        } else {
2756            destElem = (BigElement)srcElem1 << imm;
2757        }
2758    '''
2759    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2760
2761    vmovlCode = '''
2762        destElem = srcElem1;
2763    '''
2764    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2765
2766    vcvt2ufxCode = '''
2767        FPSCR fpscr = (FPSCR) FpscrExc;
2768        if (flushToZero(srcElem1))
2769            fpscr.idc = 1;
2770        VfpSavedState state = prepFpState(VfpRoundNearest);
2771        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2772        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2773        __asm__ __volatile__("" :: "m" (destReg));
2774        finishVfp(fpscr, state, true);
2775        FpscrExc = fpscr;
2776    '''
2777    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2778            2, vcvt2ufxCode, toInt = True)
2779    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2780            4, vcvt2ufxCode, toInt = True)
2781
2782    vcvt2sfxCode = '''
2783        FPSCR fpscr = (FPSCR) FpscrExc;
2784        if (flushToZero(srcElem1))
2785            fpscr.idc = 1;
2786        VfpSavedState state = prepFpState(VfpRoundNearest);
2787        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2788        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2789        __asm__ __volatile__("" :: "m" (destReg));
2790        finishVfp(fpscr, state, true);
2791        FpscrExc = fpscr;
2792    '''
2793    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2794            2, vcvt2sfxCode, toInt = True)
2795    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2796            4, vcvt2sfxCode, toInt = True)
2797
2798    vcvtu2fpCode = '''
2799        FPSCR fpscr = (FPSCR) FpscrExc;
2800        VfpSavedState state = prepFpState(VfpRoundNearest);
2801        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2802        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2803        __asm__ __volatile__("" :: "m" (destElem));
2804        finishVfp(fpscr, state, true);
2805        FpscrExc = fpscr;
2806    '''
2807    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2808            2, vcvtu2fpCode, fromInt = True)
2809    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2810            4, vcvtu2fpCode, fromInt = True)
2811
2812    vcvts2fpCode = '''
2813        FPSCR fpscr = (FPSCR) FpscrExc;
2814        VfpSavedState state = prepFpState(VfpRoundNearest);
2815        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2816        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2817        __asm__ __volatile__("" :: "m" (destElem));
2818        finishVfp(fpscr, state, true);
2819        FpscrExc = fpscr;
2820    '''
2821    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2822            2, vcvts2fpCode, fromInt = True)
2823    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2824            4, vcvts2fpCode, fromInt = True)
2825
2826    vcvts2hCode = '''
2827        FPSCR fpscr = (FPSCR) FpscrExc;
2828        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2829        if (flushToZero(srcFp1))
2830            fpscr.idc = 1;
2831        VfpSavedState state = prepFpState(VfpRoundNearest);
2832        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2833                                : "m" (srcFp1), "m" (destElem));
2834        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2835                              fpscr.ahp, srcFp1);
2836        __asm__ __volatile__("" :: "m" (destElem));
2837        finishVfp(fpscr, state, true);
2838        FpscrExc = fpscr;
2839    '''
2840    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2841
2842    vcvth2sCode = '''
2843        FPSCR fpscr = (FPSCR) FpscrExc;
2844        VfpSavedState state = prepFpState(VfpRoundNearest);
2845        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2846                                : "m" (srcElem1), "m" (destElem));
2847        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2848        __asm__ __volatile__("" :: "m" (destElem));
2849        finishVfp(fpscr, state, true);
2850        FpscrExc = fpscr;
2851    '''
2852    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2853
2854    vrsqrteCode = '''
2855        destElem = unsignedRSqrtEstimate(srcElem1);
2856    '''
2857    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2858    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2859
2860    vrsqrtefpCode = '''
2861        FPSCR fpscr = (FPSCR) FpscrExc;
2862        if (flushToZero(srcReg1))
2863            fpscr.idc = 1;
2864        destReg = fprSqrtEstimate(fpscr, srcReg1);
2865        FpscrExc = fpscr;
2866    '''
2867    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2868    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2869
2870    vrecpeCode = '''
2871        destElem = unsignedRecipEstimate(srcElem1);
2872    '''
2873    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2874    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2875
2876    vrecpefpCode = '''
2877        FPSCR fpscr = (FPSCR) FpscrExc;
2878        if (flushToZero(srcReg1))
2879            fpscr.idc = 1;
2880        destReg = fpRecipEstimate(fpscr, srcReg1);
2881        FpscrExc = fpscr;
2882    '''
2883    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2884    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2885
2886    vrev16Code = '''
2887        destElem = srcElem1;
2888        unsigned groupSize = ((1 << 1) / sizeof(Element));
2889        unsigned reverseMask = (groupSize - 1);
2890        j = i ^ reverseMask;
2891    '''
2892    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2893    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2894    vrev32Code = '''
2895        destElem = srcElem1;
2896        unsigned groupSize = ((1 << 2) / sizeof(Element));
2897        unsigned reverseMask = (groupSize - 1);
2898        j = i ^ reverseMask;
2899    '''
2900    twoRegMiscInst("vrev32", "NVrev32D",
2901            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2902    twoRegMiscInst("vrev32", "NVrev32Q",
2903            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2904    vrev64Code = '''
2905        destElem = srcElem1;
2906        unsigned groupSize = ((1 << 3) / sizeof(Element));
2907        unsigned reverseMask = (groupSize - 1);
2908        j = i ^ reverseMask;
2909    '''
2910    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2911    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2912
2913    vpaddlCode = '''
2914        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2915    '''
2916    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2917    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2918
2919    vpadalCode = '''
2920        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2921    '''
2922    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2923    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2924
2925    vclsCode = '''
2926        unsigned count = 0;
2927        if (srcElem1 < 0) {
2928            srcElem1 <<= 1;
2929            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2930                count++;
2931                srcElem1 <<= 1;
2932            }
2933        } else {
2934            srcElem1 <<= 1;
2935            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2936                count++;
2937                srcElem1 <<= 1;
2938            }
2939        }
2940        destElem = count;
2941    '''
2942    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2943    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2944
2945    vclzCode = '''
2946        unsigned count = 0;
2947        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2948            count++;
2949            srcElem1 <<= 1;
2950        }
2951        destElem = count;
2952    '''
2953    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2954    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2955
2956    vcntCode = '''
2957        unsigned count = 0;
2958        while (srcElem1 && count < sizeof(Element) * 8) {
2959            count += srcElem1 & 0x1;
2960            srcElem1 >>= 1;
2961        }
2962        destElem = count;
2963    '''
2964
2965    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2966    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2967
2968    vmvnCode = '''
2969        destElem = ~srcElem1;
2970    '''
2971    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2972    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2973
2974    vqabsCode = '''
2975        FPSCR fpscr = (FPSCR) FpscrQc;
2976        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2977            fpscr.qc = 1;
2978            destElem = ~srcElem1;
2979        } else if (srcElem1 < 0) {
2980            destElem = -srcElem1;
2981        } else {
2982            destElem = srcElem1;
2983        }
2984        FpscrQc = fpscr;
2985    '''
2986    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2987    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2988
2989    vqnegCode = '''
2990        FPSCR fpscr = (FPSCR) FpscrQc;
2991        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2992            fpscr.qc = 1;
2993            destElem = ~srcElem1;
2994        } else {
2995            destElem = -srcElem1;
2996        }
2997        FpscrQc = fpscr;
2998    '''
2999    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3000    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3001
3002    vabsCode = '''
3003        if (srcElem1 < 0) {
3004            destElem = -srcElem1;
3005        } else {
3006            destElem = srcElem1;
3007        }
3008    '''
3009
3010    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3011    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3012    vabsfpCode = '''
3013        union
3014        {
3015            uint32_t i;
3016            float f;
3017        } cStruct;
3018        cStruct.f = srcReg1;
3019        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3020        destReg = cStruct.f;
3021    '''
3022    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3023    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3024
3025    vnegCode = '''
3026        destElem = -srcElem1;
3027    '''
3028    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3029    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3030    vnegfpCode = '''
3031        destReg = -srcReg1;
3032    '''
3033    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3034    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3035
3036    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3037    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3038    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3039    vcgtfpCode = '''
3040        FPSCR fpscr = (FPSCR) FpscrExc;
3041        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3042                             true, true, VfpRoundNearest);
3043        destReg = (res == 0) ? -1 : 0;
3044        if (res == 2.0)
3045            fpscr.ioc = 1;
3046        FpscrExc = fpscr;
3047    '''
3048    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3049            2, vcgtfpCode, toInt = True)
3050    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3051            4, vcgtfpCode, toInt = True)
3052
3053    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3054    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3055    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3056    vcgefpCode = '''
3057        FPSCR fpscr = (FPSCR) FpscrExc;
3058        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3059                             true, true, VfpRoundNearest);
3060        destReg = (res == 0) ? -1 : 0;
3061        if (res == 2.0)
3062            fpscr.ioc = 1;
3063        FpscrExc = fpscr;
3064    '''
3065    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3066            2, vcgefpCode, toInt = True)
3067    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3068            4, vcgefpCode, toInt = True)
3069
3070    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3071    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3072    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3073    vceqfpCode = '''
3074        FPSCR fpscr = (FPSCR) FpscrExc;
3075        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3076                             true, true, VfpRoundNearest);
3077        destReg = (res == 0) ? -1 : 0;
3078        if (res == 2.0)
3079            fpscr.ioc = 1;
3080        FpscrExc = fpscr;
3081    '''
3082    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3083            2, vceqfpCode, toInt = True)
3084    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3085            4, vceqfpCode, toInt = True)
3086
3087    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3088    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3089    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3090    vclefpCode = '''
3091        FPSCR fpscr = (FPSCR) FpscrExc;
3092        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3093                             true, true, VfpRoundNearest);
3094        destReg = (res == 0) ? -1 : 0;
3095        if (res == 2.0)
3096            fpscr.ioc = 1;
3097        FpscrExc = fpscr;
3098    '''
3099    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3100            2, vclefpCode, toInt = True)
3101    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3102            4, vclefpCode, toInt = True)
3103
3104    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3105    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3106    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3107    vcltfpCode = '''
3108        FPSCR fpscr = (FPSCR) FpscrExc;
3109        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3110                             true, true, VfpRoundNearest);
3111        destReg = (res == 0) ? -1 : 0;
3112        if (res == 2.0)
3113            fpscr.ioc = 1;
3114        FpscrExc = fpscr;
3115    '''
3116    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3117            2, vcltfpCode, toInt = True)
3118    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3119            4, vcltfpCode, toInt = True)
3120
3121    vswpCode = '''
3122        FloatRegBits mid;
3123        for (unsigned r = 0; r < rCount; r++) {
3124            mid = srcReg1.regs[r];
3125            srcReg1.regs[r] = destReg.regs[r];
3126            destReg.regs[r] = mid;
3127        }
3128    '''
3129    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3130    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3131
3132    vtrnCode = '''
3133        Element mid;
3134        for (unsigned i = 0; i < eCount; i += 2) {
3135            mid = srcReg1.elements[i];
3136            srcReg1.elements[i] = destReg.elements[i + 1];
3137            destReg.elements[i + 1] = mid;
3138        }
3139    '''
3140    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3141    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3142
3143    vuzpCode = '''
3144        Element mid[eCount];
3145        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3146        for (unsigned i = 0; i < eCount / 2; i++) {
3147            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3148            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3149            destReg.elements[i] = destReg.elements[2 * i];
3150        }
3151        for (unsigned i = 0; i < eCount / 2; i++) {
3152            destReg.elements[eCount / 2 + i] = mid[2 * i];
3153        }
3154    '''
3155    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3156    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3157
3158    vzipCode = '''
3159        Element mid[eCount];
3160        memcpy(&mid, &destReg, sizeof(destReg));
3161        for (unsigned i = 0; i < eCount / 2; i++) {
3162            destReg.elements[2 * i] = mid[i];
3163            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3164        }
3165        for (int i = 0; i < eCount / 2; i++) {
3166            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3167            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3168        }
3169    '''
3170    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3171    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3172
3173    vmovnCode = 'destElem = srcElem1;'
3174    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3175
3176    vdupCode = 'destElem = srcElem1;'
3177    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3178    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3179
3180    def vdupGprInst(name, Name, opClass, types, rCount):
3181        global header_output, exec_output
3182        eWalkCode = '''
3183        RegVect destReg;
3184        for (unsigned i = 0; i < eCount; i++) {
3185            destReg.elements[i] = htog((Element)Op1);
3186        }
3187        '''
3188        for reg in range(rCount):
3189            eWalkCode += '''
3190            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3191            ''' % { "reg" : reg }
3192        iop = InstObjParams(name, Name,
3193                            "RegRegOp",
3194                            { "code": eWalkCode,
3195                              "r_count": rCount,
3196                              "predicate_test": predicateTest,
3197                              "op_class": opClass }, [])
3198        header_output += NeonRegRegOpDeclare.subst(iop)
3199        exec_output += NeonEqualRegExecute.subst(iop)
3200        for type in types:
3201            substDict = { "targs" : type,
3202                          "class_name" : Name }
3203            exec_output += NeonExecDeclare.subst(substDict)
3204    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3205    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3206
3207    vmovCode = 'destElem = imm;'
3208    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3209    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3210
3211    vorrCode = 'destElem |= imm;'
3212    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3213    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3214
3215    vmvnCode = 'destElem = ~imm;'
3216    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3217    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3218
3219    vbicCode = 'destElem &= ~imm;'
3220    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3221    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3222
3223    vqmovnCode = '''
3224    FPSCR fpscr = (FPSCR) FpscrQc;
3225    destElem = srcElem1;
3226    if ((BigElement)destElem != srcElem1) {
3227        fpscr.qc = 1;
3228        destElem = mask(sizeof(Element) * 8 - 1);
3229        if (srcElem1 < 0)
3230            destElem = ~destElem;
3231    }
3232    FpscrQc = fpscr;
3233    '''
3234    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3235
3236    vqmovunCode = '''
3237    FPSCR fpscr = (FPSCR) FpscrQc;
3238    destElem = srcElem1;
3239    if ((BigElement)destElem != srcElem1) {
3240        fpscr.qc = 1;
3241        destElem = mask(sizeof(Element) * 8);
3242    }
3243    FpscrQc = fpscr;
3244    '''
3245    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3246            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3247
3248    vqmovunsCode = '''
3249    FPSCR fpscr = (FPSCR) FpscrQc;
3250    destElem = srcElem1;
3251    if (srcElem1 < 0 ||
3252            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3253        fpscr.qc = 1;
3254        destElem = mask(sizeof(Element) * 8);
3255        if (srcElem1 < 0)
3256            destElem = ~destElem;
3257    }
3258    FpscrQc = fpscr;
3259    '''
3260    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3261            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3262
3263    def buildVext(name, Name, opClass, types, rCount, op):
3264        global header_output, exec_output
3265        eWalkCode = '''
3266        RegVect srcReg1, srcReg2, destReg;
3267        '''
3268        for reg in range(rCount):
3269            eWalkCode += simdEnabledCheckCode + '''
3270                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3271                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3272            ''' % { "reg" : reg }
3273        eWalkCode += op
3274        for reg in range(rCount):
3275            eWalkCode += '''
3276            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3277            ''' % { "reg" : reg }
3278        iop = InstObjParams(name, Name,
3279                            "RegRegRegImmOp",
3280                            { "code": eWalkCode,
3281                              "r_count": rCount,
3282                              "predicate_test": predicateTest,
3283                              "op_class": opClass }, [])
3284        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3285        exec_output += NeonEqualRegExecute.subst(iop)
3286        for type in types:
3287            substDict = { "targs" : type,
3288                          "class_name" : Name }
3289            exec_output += NeonExecDeclare.subst(substDict)
3290
3291    vextCode = '''
3292        for (unsigned i = 0; i < eCount; i++) {
3293            unsigned index = i + imm;
3294            if (index < eCount) {
3295                destReg.elements[i] = srcReg1.elements[index];
3296            } else {
3297                index -= eCount;
3298                if (index >= eCount) {
3299                    if (FullSystem)
3300                        fault = new UndefinedInstruction;
3301                    else
3302                        fault = new UndefinedInstruction(false, mnemonic);
3303                } else {
3304                    destReg.elements[i] = srcReg2.elements[index];
3305                }
3306            }
3307        }
3308    '''
3309    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3310    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3311
3312    def buildVtbxl(name, Name, opClass, length, isVtbl):
3313        global header_output, decoder_output, exec_output
3314        code = '''
3315            union
3316            {
3317                uint8_t bytes[32];
3318                FloatRegBits regs[8];
3319            } table;
3320
3321            union
3322            {
3323                uint8_t bytes[8];
3324                FloatRegBits regs[2];
3325            } destReg, srcReg2;
3326
3327            const unsigned length = %(length)d;
3328            const bool isVtbl = %(isVtbl)s;
3329
3330            srcReg2.regs[0] = htog(FpOp2P0_uw);
3331            srcReg2.regs[1] = htog(FpOp2P1_uw);
3332
3333            destReg.regs[0] = htog(FpDestP0_uw);
3334            destReg.regs[1] = htog(FpDestP1_uw);
3335        ''' % { "length" : length, "isVtbl" : isVtbl }
3336        for reg in range(8):
3337            if reg < length * 2:
3338                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3339                        { "reg" : reg }
3340            else:
3341                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3342        code += '''
3343        for (unsigned i = 0; i < sizeof(destReg); i++) {
3344            uint8_t index = srcReg2.bytes[i];
3345            if (index < 8 * length) {
3346                destReg.bytes[i] = table.bytes[index];
3347            } else {
3348                if (isVtbl)
3349                    destReg.bytes[i] = 0;
3350                // else destReg.bytes[i] unchanged
3351            }
3352        }
3353
3354        FpDestP0_uw = gtoh(destReg.regs[0]);
3355        FpDestP1_uw = gtoh(destReg.regs[1]);
3356        '''
3357        iop = InstObjParams(name, Name,
3358                            "RegRegRegOp",
3359                            { "code": code,
3360                              "predicate_test": predicateTest,
3361                              "op_class": opClass }, [])
3362        header_output += RegRegRegOpDeclare.subst(iop)
3363        decoder_output += RegRegRegOpConstructor.subst(iop)
3364        exec_output += PredOpExecute.subst(iop)
3365
3366    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3367    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3368    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3369    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3370
3371    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3372    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3373    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3374    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3375}};
3376