neon.isa revision 8607:5fb918115c07
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (isSnan(op1) || isSnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (isSnan(op1) || isSnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (isSnan(op1) || isSnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (isSnan(op1) || isSnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (isSnan(op1) || isSnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (isSnan(op1) || isSnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623
624    header_output = ""
625    exec_output = ""
626
627    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630    signedTypes = smallSignedTypes + ("int64_t",)
631    smallTypes = smallUnsignedTypes + smallSignedTypes
632    allTypes = unsignedTypes + signedTypes
633
634    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635                          readDest=False, pairwise=False):
636        global header_output, exec_output
637        eWalkCode = simdEnabledCheckCode + '''
638        RegVect srcReg1, srcReg2, destReg;
639        '''
640        for reg in range(rCount):
641            eWalkCode += '''
642                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644            ''' % { "reg" : reg }
645            if readDest:
646                eWalkCode += '''
647                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648                ''' % { "reg" : reg }
649        readDestCode = ''
650        if readDest:
651            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652        if pairwise:
653            eWalkCode += '''
654            for (unsigned i = 0; i < eCount; i++) {
655                Element srcElem1 = gtoh(2 * i < eCount ?
656                                        srcReg1.elements[2 * i] :
657                                        srcReg2.elements[2 * i - eCount]);
658                Element srcElem2 = gtoh(2 * i < eCount ?
659                                        srcReg1.elements[2 * i + 1] :
660                                        srcReg2.elements[2 * i + 1 - eCount]);
661                Element destElem;
662                %(readDest)s
663                %(op)s
664                destReg.elements[i] = htog(destElem);
665            }
666            ''' % { "op" : op, "readDest" : readDestCode }
667        else:
668            eWalkCode += '''
669            for (unsigned i = 0; i < eCount; i++) {
670                Element srcElem1 = gtoh(srcReg1.elements[i]);
671                Element srcElem2 = gtoh(srcReg2.elements[i]);
672                Element destElem;
673                %(readDest)s
674                %(op)s
675                destReg.elements[i] = htog(destElem);
676            }
677            ''' % { "op" : op, "readDest" : readDestCode }
678        for reg in range(rCount):
679            eWalkCode += '''
680            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681            ''' % { "reg" : reg }
682        iop = InstObjParams(name, Name,
683                            "RegRegRegOp",
684                            { "code": eWalkCode,
685                              "r_count": rCount,
686                              "predicate_test": predicateTest,
687                              "op_class": opClass }, [])
688        header_output += NeonRegRegRegOpDeclare.subst(iop)
689        exec_output += NeonEqualRegExecute.subst(iop)
690        for type in types:
691            substDict = { "targs" : type,
692                          "class_name" : Name }
693            exec_output += NeonExecDeclare.subst(substDict)
694
695    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696                            readDest=False, pairwise=False, toInt=False):
697        global header_output, exec_output
698        eWalkCode = simdEnabledCheckCode + '''
699        typedef FloatReg FloatVect[rCount];
700        FloatVect srcRegs1, srcRegs2;
701        '''
702        if toInt:
703            eWalkCode += 'RegVect destRegs;\n'
704        else:
705            eWalkCode += 'FloatVect destRegs;\n'
706        for reg in range(rCount):
707            eWalkCode += '''
708                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710            ''' % { "reg" : reg }
711            if readDest:
712                if toInt:
713                    eWalkCode += '''
714                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715                    ''' % { "reg" : reg }
716                else:
717                    eWalkCode += '''
718                        destRegs[%(reg)d] = FpDestP%(reg)d;
719                    ''' % { "reg" : reg }
720        readDestCode = ''
721        if readDest:
722            readDestCode = 'destReg = destRegs[r];'
723        destType = 'FloatReg'
724        writeDest = 'destRegs[r] = destReg;'
725        if toInt:
726            destType = 'FloatRegBits'
727            writeDest = 'destRegs.regs[r] = destReg;'
728        if pairwise:
729            eWalkCode += '''
730            for (unsigned r = 0; r < rCount; r++) {
731                FloatReg srcReg1 = (2 * r < rCount) ?
732                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733                FloatReg srcReg2 = (2 * r < rCount) ?
734                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735                %(destType)s destReg;
736                %(readDest)s
737                %(op)s
738                %(writeDest)s
739            }
740            ''' % { "op" : op,
741                    "readDest" : readDestCode,
742                    "destType" : destType,
743                    "writeDest" : writeDest }
744        else:
745            eWalkCode += '''
746            for (unsigned r = 0; r < rCount; r++) {
747                FloatReg srcReg1 = srcRegs1[r];
748                FloatReg srcReg2 = srcRegs2[r];
749                %(destType)s destReg;
750                %(readDest)s
751                %(op)s
752                %(writeDest)s
753            }
754            ''' % { "op" : op,
755                    "readDest" : readDestCode,
756                    "destType" : destType,
757                    "writeDest" : writeDest }
758        for reg in range(rCount):
759            if toInt:
760                eWalkCode += '''
761                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762                ''' % { "reg" : reg }
763            else:
764                eWalkCode += '''
765                FpDestP%(reg)d = destRegs[%(reg)d];
766                ''' % { "reg" : reg }
767        iop = InstObjParams(name, Name,
768                            "FpRegRegRegOp",
769                            { "code": eWalkCode,
770                              "r_count": rCount,
771                              "predicate_test": predicateTest,
772                              "op_class": opClass }, [])
773        header_output += NeonRegRegRegOpDeclare.subst(iop)
774        exec_output += NeonEqualRegExecute.subst(iop)
775        for type in types:
776            substDict = { "targs" : type,
777                          "class_name" : Name }
778            exec_output += NeonExecDeclare.subst(substDict)
779
780    def threeUnequalRegInst(name, Name, opClass, types, op,
781                            bigSrc1, bigSrc2, bigDest, readDest):
782        global header_output, exec_output
783        src1Cnt = src2Cnt = destCnt = 2
784        src1Prefix = src2Prefix = destPrefix = ''
785        if bigSrc1:
786            src1Cnt = 4
787            src1Prefix = 'Big'
788        if bigSrc2:
789            src2Cnt = 4
790            src2Prefix = 'Big'
791        if bigDest:
792            destCnt = 4
793            destPrefix = 'Big'
794        eWalkCode = simdEnabledCheckCode + '''
795            %sRegVect srcReg1;
796            %sRegVect srcReg2;
797            %sRegVect destReg;
798        ''' % (src1Prefix, src2Prefix, destPrefix)
799        for reg in range(src1Cnt):
800            eWalkCode += '''
801                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802            ''' % { "reg" : reg }
803        for reg in range(src2Cnt):
804            eWalkCode += '''
805                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806            ''' % { "reg" : reg }
807        if readDest:
808            for reg in range(destCnt):
809                eWalkCode += '''
810                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811                ''' % { "reg" : reg }
812        readDestCode = ''
813        if readDest:
814            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815        eWalkCode += '''
816        for (unsigned i = 0; i < eCount; i++) {
817            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819            %(destPrefix)sElement destElem;
820            %(readDest)s
821            %(op)s
822            destReg.elements[i] = htog(destElem);
823        }
824        ''' % { "op" : op, "readDest" : readDestCode,
825                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826                "destPrefix" : destPrefix }
827        for reg in range(destCnt):
828            eWalkCode += '''
829            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830            ''' % { "reg" : reg }
831        iop = InstObjParams(name, Name,
832                            "RegRegRegOp",
833                            { "code": eWalkCode,
834                              "r_count": 2,
835                              "predicate_test": predicateTest,
836                              "op_class": opClass }, [])
837        header_output += NeonRegRegRegOpDeclare.subst(iop)
838        exec_output += NeonUnequalRegExecute.subst(iop)
839        for type in types:
840            substDict = { "targs" : type,
841                          "class_name" : Name }
842            exec_output += NeonExecDeclare.subst(substDict)
843
844    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845        threeUnequalRegInst(name, Name, opClass, types, op,
846                            True, True, False, readDest)
847
848    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849        threeUnequalRegInst(name, Name, opClass, types, op,
850                            False, False, True, readDest)
851
852    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853        threeUnequalRegInst(name, Name, opClass, types, op,
854                            True, False, True, readDest)
855
856    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857        global header_output, exec_output
858        eWalkCode = simdEnabledCheckCode + '''
859        RegVect srcReg1, srcReg2, destReg;
860        '''
861        for reg in range(rCount):
862            eWalkCode += '''
863                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865            ''' % { "reg" : reg }
866            if readDest:
867                eWalkCode += '''
868                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869                ''' % { "reg" : reg }
870        readDestCode = ''
871        if readDest:
872            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873        eWalkCode += '''
874        if (imm < 0 && imm >= eCount) {
875#if FULL_SYSTEM
876            fault = new UndefinedInstruction;
877#else
878            fault = new UndefinedInstruction(false, mnemonic);
879#endif
880        } else {
881            for (unsigned i = 0; i < eCount; i++) {
882                Element srcElem1 = gtoh(srcReg1.elements[i]);
883                Element srcElem2 = gtoh(srcReg2.elements[imm]);
884                Element destElem;
885                %(readDest)s
886                %(op)s
887                destReg.elements[i] = htog(destElem);
888            }
889        }
890        ''' % { "op" : op, "readDest" : readDestCode }
891        for reg in range(rCount):
892            eWalkCode += '''
893            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
894            ''' % { "reg" : reg }
895        iop = InstObjParams(name, Name,
896                            "RegRegRegImmOp",
897                            { "code": eWalkCode,
898                              "r_count": rCount,
899                              "predicate_test": predicateTest,
900                              "op_class": opClass }, [])
901        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
902        exec_output += NeonEqualRegExecute.subst(iop)
903        for type in types:
904            substDict = { "targs" : type,
905                          "class_name" : Name }
906            exec_output += NeonExecDeclare.subst(substDict)
907
908    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
909        global header_output, exec_output
910        rCount = 2
911        eWalkCode = simdEnabledCheckCode + '''
912        RegVect srcReg1, srcReg2;
913        BigRegVect destReg;
914        '''
915        for reg in range(rCount):
916            eWalkCode += '''
917                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
918                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
919            ''' % { "reg" : reg }
920        if readDest:
921            for reg in range(2 * rCount):
922                eWalkCode += '''
923                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
924                ''' % { "reg" : reg }
925        readDestCode = ''
926        if readDest:
927            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
928        eWalkCode += '''
929        if (imm < 0 && imm >= eCount) {
930#if FULL_SYSTEM
931            fault = new UndefinedInstruction;
932#else
933            fault = new UndefinedInstruction(false, mnemonic);
934#endif
935        } else {
936            for (unsigned i = 0; i < eCount; i++) {
937                Element srcElem1 = gtoh(srcReg1.elements[i]);
938                Element srcElem2 = gtoh(srcReg2.elements[imm]);
939                BigElement destElem;
940                %(readDest)s
941                %(op)s
942                destReg.elements[i] = htog(destElem);
943            }
944        }
945        ''' % { "op" : op, "readDest" : readDestCode }
946        for reg in range(2 * rCount):
947            eWalkCode += '''
948            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
949            ''' % { "reg" : reg }
950        iop = InstObjParams(name, Name,
951                            "RegRegRegImmOp",
952                            { "code": eWalkCode,
953                              "r_count": rCount,
954                              "predicate_test": predicateTest,
955                              "op_class": opClass }, [])
956        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
957        exec_output += NeonUnequalRegExecute.subst(iop)
958        for type in types:
959            substDict = { "targs" : type,
960                          "class_name" : Name }
961            exec_output += NeonExecDeclare.subst(substDict)
962
963    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
964        global header_output, exec_output
965        eWalkCode = simdEnabledCheckCode + '''
966        typedef FloatReg FloatVect[rCount];
967        FloatVect srcRegs1, srcRegs2, destRegs;
968        '''
969        for reg in range(rCount):
970            eWalkCode += '''
971                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
972                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
973            ''' % { "reg" : reg }
974            if readDest:
975                eWalkCode += '''
976                    destRegs[%(reg)d] = FpDestP%(reg)d;
977                ''' % { "reg" : reg }
978        readDestCode = ''
979        if readDest:
980            readDestCode = 'destReg = destRegs[i];'
981        eWalkCode += '''
982        if (imm < 0 && imm >= eCount) {
983#if FULL_SYSTEM
984            fault = new UndefinedInstruction;
985#else
986            fault = new UndefinedInstruction(false, mnemonic);
987#endif
988        } else {
989            for (unsigned i = 0; i < rCount; i++) {
990                FloatReg srcReg1 = srcRegs1[i];
991                FloatReg srcReg2 = srcRegs2[imm];
992                FloatReg destReg;
993                %(readDest)s
994                %(op)s
995                destRegs[i] = destReg;
996            }
997        }
998        ''' % { "op" : op, "readDest" : readDestCode }
999        for reg in range(rCount):
1000            eWalkCode += '''
1001            FpDestP%(reg)d = destRegs[%(reg)d];
1002            ''' % { "reg" : reg }
1003        iop = InstObjParams(name, Name,
1004                            "FpRegRegRegImmOp",
1005                            { "code": eWalkCode,
1006                              "r_count": rCount,
1007                              "predicate_test": predicateTest,
1008                              "op_class": opClass }, [])
1009        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1010        exec_output += NeonEqualRegExecute.subst(iop)
1011        for type in types:
1012            substDict = { "targs" : type,
1013                          "class_name" : Name }
1014            exec_output += NeonExecDeclare.subst(substDict)
1015
1016    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1017            readDest=False, toInt=False, fromInt=False):
1018        global header_output, exec_output
1019        eWalkCode = simdEnabledCheckCode + '''
1020        RegVect srcRegs1, destRegs;
1021        '''
1022        for reg in range(rCount):
1023            eWalkCode += '''
1024                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1025            ''' % { "reg" : reg }
1026            if readDest:
1027                eWalkCode += '''
1028                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1029                ''' % { "reg" : reg }
1030        readDestCode = ''
1031        if readDest:
1032            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1033            if toInt:
1034                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1035        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1036        if fromInt:
1037            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1038        declDest = 'Element destElem;'
1039        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1040        if toInt:
1041            declDest = 'FloatRegBits destReg;'
1042            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1043        eWalkCode += '''
1044        for (unsigned i = 0; i < eCount; i++) {
1045            %(readOp)s
1046            %(declDest)s
1047            %(readDest)s
1048            %(op)s
1049            %(writeDest)s
1050        }
1051        ''' % { "readOp" : readOpCode,
1052                "declDest" : declDest,
1053                "readDest" : readDestCode,
1054                "op" : op,
1055                "writeDest" : writeDestCode }
1056        for reg in range(rCount):
1057            eWalkCode += '''
1058            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1059            ''' % { "reg" : reg }
1060        iop = InstObjParams(name, Name,
1061                            "RegRegImmOp",
1062                            { "code": eWalkCode,
1063                              "r_count": rCount,
1064                              "predicate_test": predicateTest,
1065                              "op_class": opClass }, [])
1066        header_output += NeonRegRegImmOpDeclare.subst(iop)
1067        exec_output += NeonEqualRegExecute.subst(iop)
1068        for type in types:
1069            substDict = { "targs" : type,
1070                          "class_name" : Name }
1071            exec_output += NeonExecDeclare.subst(substDict)
1072
1073    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1074        global header_output, exec_output
1075        eWalkCode = simdEnabledCheckCode + '''
1076        BigRegVect srcReg1;
1077        RegVect destReg;
1078        '''
1079        for reg in range(4):
1080            eWalkCode += '''
1081                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1082            ''' % { "reg" : reg }
1083        if readDest:
1084            for reg in range(2):
1085                eWalkCode += '''
1086                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1087                ''' % { "reg" : reg }
1088        readDestCode = ''
1089        if readDest:
1090            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1091        eWalkCode += '''
1092        for (unsigned i = 0; i < eCount; i++) {
1093            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1094            Element destElem;
1095            %(readDest)s
1096            %(op)s
1097            destReg.elements[i] = htog(destElem);
1098        }
1099        ''' % { "op" : op, "readDest" : readDestCode }
1100        for reg in range(2):
1101            eWalkCode += '''
1102            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1103            ''' % { "reg" : reg }
1104        iop = InstObjParams(name, Name,
1105                            "RegRegImmOp",
1106                            { "code": eWalkCode,
1107                              "r_count": 2,
1108                              "predicate_test": predicateTest,
1109                              "op_class": opClass }, [])
1110        header_output += NeonRegRegImmOpDeclare.subst(iop)
1111        exec_output += NeonUnequalRegExecute.subst(iop)
1112        for type in types:
1113            substDict = { "targs" : type,
1114                          "class_name" : Name }
1115            exec_output += NeonExecDeclare.subst(substDict)
1116
1117    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1118        global header_output, exec_output
1119        eWalkCode = simdEnabledCheckCode + '''
1120        RegVect srcReg1;
1121        BigRegVect destReg;
1122        '''
1123        for reg in range(2):
1124            eWalkCode += '''
1125                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1126            ''' % { "reg" : reg }
1127        if readDest:
1128            for reg in range(4):
1129                eWalkCode += '''
1130                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1131                ''' % { "reg" : reg }
1132        readDestCode = ''
1133        if readDest:
1134            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1135        eWalkCode += '''
1136        for (unsigned i = 0; i < eCount; i++) {
1137            Element srcElem1 = gtoh(srcReg1.elements[i]);
1138            BigElement destElem;
1139            %(readDest)s
1140            %(op)s
1141            destReg.elements[i] = htog(destElem);
1142        }
1143        ''' % { "op" : op, "readDest" : readDestCode }
1144        for reg in range(4):
1145            eWalkCode += '''
1146            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1147            ''' % { "reg" : reg }
1148        iop = InstObjParams(name, Name,
1149                            "RegRegImmOp",
1150                            { "code": eWalkCode,
1151                              "r_count": 2,
1152                              "predicate_test": predicateTest,
1153                              "op_class": opClass }, [])
1154        header_output += NeonRegRegImmOpDeclare.subst(iop)
1155        exec_output += NeonUnequalRegExecute.subst(iop)
1156        for type in types:
1157            substDict = { "targs" : type,
1158                          "class_name" : Name }
1159            exec_output += NeonExecDeclare.subst(substDict)
1160
1161    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1162        global header_output, exec_output
1163        eWalkCode = simdEnabledCheckCode + '''
1164        RegVect srcReg1, destReg;
1165        '''
1166        for reg in range(rCount):
1167            eWalkCode += '''
1168                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1169            ''' % { "reg" : reg }
1170            if readDest:
1171                eWalkCode += '''
1172                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1173                ''' % { "reg" : reg }
1174        readDestCode = ''
1175        if readDest:
1176            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1177        eWalkCode += '''
1178        for (unsigned i = 0; i < eCount; i++) {
1179            unsigned j = i;
1180            Element srcElem1 = gtoh(srcReg1.elements[i]);
1181            Element destElem;
1182            %(readDest)s
1183            %(op)s
1184            destReg.elements[j] = htog(destElem);
1185        }
1186        ''' % { "op" : op, "readDest" : readDestCode }
1187        for reg in range(rCount):
1188            eWalkCode += '''
1189            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1190            ''' % { "reg" : reg }
1191        iop = InstObjParams(name, Name,
1192                            "RegRegOp",
1193                            { "code": eWalkCode,
1194                              "r_count": rCount,
1195                              "predicate_test": predicateTest,
1196                              "op_class": opClass }, [])
1197        header_output += NeonRegRegOpDeclare.subst(iop)
1198        exec_output += NeonEqualRegExecute.subst(iop)
1199        for type in types:
1200            substDict = { "targs" : type,
1201                          "class_name" : Name }
1202            exec_output += NeonExecDeclare.subst(substDict)
1203
1204    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1205        global header_output, exec_output
1206        eWalkCode = simdEnabledCheckCode + '''
1207        RegVect srcReg1, destReg;
1208        '''
1209        for reg in range(rCount):
1210            eWalkCode += '''
1211                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1212            ''' % { "reg" : reg }
1213            if readDest:
1214                eWalkCode += '''
1215                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1216                ''' % { "reg" : reg }
1217        readDestCode = ''
1218        if readDest:
1219            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1220        eWalkCode += '''
1221        for (unsigned i = 0; i < eCount; i++) {
1222            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1223            Element destElem;
1224            %(readDest)s
1225            %(op)s
1226            destReg.elements[i] = htog(destElem);
1227        }
1228        ''' % { "op" : op, "readDest" : readDestCode }
1229        for reg in range(rCount):
1230            eWalkCode += '''
1231            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1232            ''' % { "reg" : reg }
1233        iop = InstObjParams(name, Name,
1234                            "RegRegImmOp",
1235                            { "code": eWalkCode,
1236                              "r_count": rCount,
1237                              "predicate_test": predicateTest,
1238                              "op_class": opClass }, [])
1239        header_output += NeonRegRegImmOpDeclare.subst(iop)
1240        exec_output += NeonEqualRegExecute.subst(iop)
1241        for type in types:
1242            substDict = { "targs" : type,
1243                          "class_name" : Name }
1244            exec_output += NeonExecDeclare.subst(substDict)
1245
1246    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1247        global header_output, exec_output
1248        eWalkCode = simdEnabledCheckCode + '''
1249        RegVect srcReg1, destReg;
1250        '''
1251        for reg in range(rCount):
1252            eWalkCode += '''
1253                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1254                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1255            ''' % { "reg" : reg }
1256            if readDest:
1257                eWalkCode += '''
1258                ''' % { "reg" : reg }
1259        readDestCode = ''
1260        if readDest:
1261            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1262        eWalkCode += op
1263        for reg in range(rCount):
1264            eWalkCode += '''
1265            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1266            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1267            ''' % { "reg" : reg }
1268        iop = InstObjParams(name, Name,
1269                            "RegRegOp",
1270                            { "code": eWalkCode,
1271                              "r_count": rCount,
1272                              "predicate_test": predicateTest,
1273                              "op_class": opClass }, [])
1274        header_output += NeonRegRegOpDeclare.subst(iop)
1275        exec_output += NeonEqualRegExecute.subst(iop)
1276        for type in types:
1277            substDict = { "targs" : type,
1278                          "class_name" : Name }
1279            exec_output += NeonExecDeclare.subst(substDict)
1280
1281    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1282            readDest=False, toInt=False):
1283        global header_output, exec_output
1284        eWalkCode = simdEnabledCheckCode + '''
1285        typedef FloatReg FloatVect[rCount];
1286        FloatVect srcRegs1;
1287        '''
1288        if toInt:
1289            eWalkCode += 'RegVect destRegs;\n'
1290        else:
1291            eWalkCode += 'FloatVect destRegs;\n'
1292        for reg in range(rCount):
1293            eWalkCode += '''
1294                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1295            ''' % { "reg" : reg }
1296            if readDest:
1297                if toInt:
1298                    eWalkCode += '''
1299                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1300                    ''' % { "reg" : reg }
1301                else:
1302                    eWalkCode += '''
1303                        destRegs[%(reg)d] = FpDestP%(reg)d;
1304                    ''' % { "reg" : reg }
1305        readDestCode = ''
1306        if readDest:
1307            readDestCode = 'destReg = destRegs[i];'
1308        destType = 'FloatReg'
1309        writeDest = 'destRegs[r] = destReg;'
1310        if toInt:
1311            destType = 'FloatRegBits'
1312            writeDest = 'destRegs.regs[r] = destReg;'
1313        eWalkCode += '''
1314        for (unsigned r = 0; r < rCount; r++) {
1315            FloatReg srcReg1 = srcRegs1[r];
1316            %(destType)s destReg;
1317            %(readDest)s
1318            %(op)s
1319            %(writeDest)s
1320        }
1321        ''' % { "op" : op,
1322                "readDest" : readDestCode,
1323                "destType" : destType,
1324                "writeDest" : writeDest }
1325        for reg in range(rCount):
1326            if toInt:
1327                eWalkCode += '''
1328                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1329                ''' % { "reg" : reg }
1330            else:
1331                eWalkCode += '''
1332                FpDestP%(reg)d = destRegs[%(reg)d];
1333                ''' % { "reg" : reg }
1334        iop = InstObjParams(name, Name,
1335                            "FpRegRegOp",
1336                            { "code": eWalkCode,
1337                              "r_count": rCount,
1338                              "predicate_test": predicateTest,
1339                              "op_class": opClass }, [])
1340        header_output += NeonRegRegOpDeclare.subst(iop)
1341        exec_output += NeonEqualRegExecute.subst(iop)
1342        for type in types:
1343            substDict = { "targs" : type,
1344                          "class_name" : Name }
1345            exec_output += NeonExecDeclare.subst(substDict)
1346
1347    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1348        global header_output, exec_output
1349        eWalkCode = simdEnabledCheckCode + '''
1350        RegVect srcRegs;
1351        BigRegVect destReg;
1352        '''
1353        for reg in range(rCount):
1354            eWalkCode += '''
1355                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1356            ''' % { "reg" : reg }
1357            if readDest:
1358                eWalkCode += '''
1359                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1360                ''' % { "reg" : reg }
1361        readDestCode = ''
1362        if readDest:
1363            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1364        eWalkCode += '''
1365        for (unsigned i = 0; i < eCount / 2; i++) {
1366            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1367            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1368            BigElement destElem;
1369            %(readDest)s
1370            %(op)s
1371            destReg.elements[i] = htog(destElem);
1372        }
1373        ''' % { "op" : op, "readDest" : readDestCode }
1374        for reg in range(rCount):
1375            eWalkCode += '''
1376            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1377            ''' % { "reg" : reg }
1378        iop = InstObjParams(name, Name,
1379                            "RegRegOp",
1380                            { "code": eWalkCode,
1381                              "r_count": rCount,
1382                              "predicate_test": predicateTest,
1383                              "op_class": opClass }, [])
1384        header_output += NeonRegRegOpDeclare.subst(iop)
1385        exec_output += NeonUnequalRegExecute.subst(iop)
1386        for type in types:
1387            substDict = { "targs" : type,
1388                          "class_name" : Name }
1389            exec_output += NeonExecDeclare.subst(substDict)
1390
1391    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1392        global header_output, exec_output
1393        eWalkCode = simdEnabledCheckCode + '''
1394        BigRegVect srcReg1;
1395        RegVect destReg;
1396        '''
1397        for reg in range(4):
1398            eWalkCode += '''
1399                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1400            ''' % { "reg" : reg }
1401        if readDest:
1402            for reg in range(2):
1403                eWalkCode += '''
1404                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1405                ''' % { "reg" : reg }
1406        readDestCode = ''
1407        if readDest:
1408            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1409        eWalkCode += '''
1410        for (unsigned i = 0; i < eCount; i++) {
1411            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1412            Element destElem;
1413            %(readDest)s
1414            %(op)s
1415            destReg.elements[i] = htog(destElem);
1416        }
1417        ''' % { "op" : op, "readDest" : readDestCode }
1418        for reg in range(2):
1419            eWalkCode += '''
1420            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1421            ''' % { "reg" : reg }
1422        iop = InstObjParams(name, Name,
1423                            "RegRegOp",
1424                            { "code": eWalkCode,
1425                              "r_count": 2,
1426                              "predicate_test": predicateTest,
1427                              "op_class": opClass }, [])
1428        header_output += NeonRegRegOpDeclare.subst(iop)
1429        exec_output += NeonUnequalRegExecute.subst(iop)
1430        for type in types:
1431            substDict = { "targs" : type,
1432                          "class_name" : Name }
1433            exec_output += NeonExecDeclare.subst(substDict)
1434
1435    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1436        global header_output, exec_output
1437        eWalkCode = simdEnabledCheckCode + '''
1438        RegVect destReg;
1439        '''
1440        if readDest:
1441            for reg in range(rCount):
1442                eWalkCode += '''
1443                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1444                ''' % { "reg" : reg }
1445        readDestCode = ''
1446        if readDest:
1447            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1448        eWalkCode += '''
1449        for (unsigned i = 0; i < eCount; i++) {
1450            Element destElem;
1451            %(readDest)s
1452            %(op)s
1453            destReg.elements[i] = htog(destElem);
1454        }
1455        ''' % { "op" : op, "readDest" : readDestCode }
1456        for reg in range(rCount):
1457            eWalkCode += '''
1458            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1459            ''' % { "reg" : reg }
1460        iop = InstObjParams(name, Name,
1461                            "RegImmOp",
1462                            { "code": eWalkCode,
1463                              "r_count": rCount,
1464                              "predicate_test": predicateTest,
1465                              "op_class": opClass }, [])
1466        header_output += NeonRegImmOpDeclare.subst(iop)
1467        exec_output += NeonEqualRegExecute.subst(iop)
1468        for type in types:
1469            substDict = { "targs" : type,
1470                          "class_name" : Name }
1471            exec_output += NeonExecDeclare.subst(substDict)
1472
1473    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1474        global header_output, exec_output
1475        eWalkCode = simdEnabledCheckCode + '''
1476        RegVect srcReg1;
1477        BigRegVect destReg;
1478        '''
1479        for reg in range(2):
1480            eWalkCode += '''
1481                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1482            ''' % { "reg" : reg }
1483        if readDest:
1484            for reg in range(4):
1485                eWalkCode += '''
1486                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1487                ''' % { "reg" : reg }
1488        readDestCode = ''
1489        if readDest:
1490            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1491        eWalkCode += '''
1492        for (unsigned i = 0; i < eCount; i++) {
1493            Element srcElem1 = gtoh(srcReg1.elements[i]);
1494            BigElement destElem;
1495            %(readDest)s
1496            %(op)s
1497            destReg.elements[i] = htog(destElem);
1498        }
1499        ''' % { "op" : op, "readDest" : readDestCode }
1500        for reg in range(4):
1501            eWalkCode += '''
1502            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1503            ''' % { "reg" : reg }
1504        iop = InstObjParams(name, Name,
1505                            "RegRegOp",
1506                            { "code": eWalkCode,
1507                              "r_count": 2,
1508                              "predicate_test": predicateTest,
1509                              "op_class": opClass }, [])
1510        header_output += NeonRegRegOpDeclare.subst(iop)
1511        exec_output += NeonUnequalRegExecute.subst(iop)
1512        for type in types:
1513            substDict = { "targs" : type,
1514                          "class_name" : Name }
1515            exec_output += NeonExecDeclare.subst(substDict)
1516
1517    vhaddCode = '''
1518        Element carryBit =
1519            (((unsigned)srcElem1 & 0x1) +
1520             ((unsigned)srcElem2 & 0x1)) >> 1;
1521        // Use division instead of a shift to ensure the sign extension works
1522        // right. The compiler will figure out if it can be a shift. Mask the
1523        // inputs so they get truncated correctly.
1524        destElem = (((srcElem1 & ~(Element)1) / 2) +
1525                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1526    '''
1527    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1528    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1529
1530    vrhaddCode = '''
1531        Element carryBit =
1532            (((unsigned)srcElem1 & 0x1) +
1533             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1534        // Use division instead of a shift to ensure the sign extension works
1535        // right. The compiler will figure out if it can be a shift. Mask the
1536        // inputs so they get truncated correctly.
1537        destElem = (((srcElem1 & ~(Element)1) / 2) +
1538                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1539    '''
1540    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1541    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1542
1543    vhsubCode = '''
1544        Element barrowBit =
1545            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1546        // Use division instead of a shift to ensure the sign extension works
1547        // right. The compiler will figure out if it can be a shift. Mask the
1548        // inputs so they get truncated correctly.
1549        destElem = (((srcElem1 & ~(Element)1) / 2) -
1550                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1551    '''
1552    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1553    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1554
1555    vandCode = '''
1556        destElem = srcElem1 & srcElem2;
1557    '''
1558    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1559    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1560
1561    vbicCode = '''
1562        destElem = srcElem1 & ~srcElem2;
1563    '''
1564    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1565    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1566
1567    vorrCode = '''
1568        destElem = srcElem1 | srcElem2;
1569    '''
1570    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1571    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1572
1573    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1574    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1575
1576    vornCode = '''
1577        destElem = srcElem1 | ~srcElem2;
1578    '''
1579    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1580    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1581
1582    veorCode = '''
1583        destElem = srcElem1 ^ srcElem2;
1584    '''
1585    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1586    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1587
1588    vbifCode = '''
1589        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1590    '''
1591    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1592    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1593    vbitCode = '''
1594        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1595    '''
1596    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1597    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1598    vbslCode = '''
1599        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1600    '''
1601    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1602    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1603
1604    vmaxCode = '''
1605        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1606    '''
1607    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1608    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1609
1610    vminCode = '''
1611        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1612    '''
1613    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1614    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1615
1616    vaddCode = '''
1617        destElem = srcElem1 + srcElem2;
1618    '''
1619    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1620    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1621
1622    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1623                      2, vaddCode, pairwise=True)
1624    vaddlwCode = '''
1625        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1626    '''
1627    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1628    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1629    vaddhnCode = '''
1630        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1631                   (sizeof(Element) * 8);
1632    '''
1633    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1634    vraddhnCode = '''
1635        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1636                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1637                   (sizeof(Element) * 8);
1638    '''
1639    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1640
1641    vsubCode = '''
1642        destElem = srcElem1 - srcElem2;
1643    '''
1644    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1645    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1646    vsublwCode = '''
1647        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1648    '''
1649    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1650    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1651
1652    vqaddUCode = '''
1653        destElem = srcElem1 + srcElem2;
1654        FPSCR fpscr = (FPSCR) FpscrQc;
1655        if (destElem < srcElem1 || destElem < srcElem2) {
1656            destElem = (Element)(-1);
1657            fpscr.qc = 1;
1658        }
1659        FpscrQc = fpscr;
1660    '''
1661    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1662    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1663    vsubhnCode = '''
1664        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1665                   (sizeof(Element) * 8);
1666    '''
1667    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1668    vrsubhnCode = '''
1669        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1670                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1671                   (sizeof(Element) * 8);
1672    '''
1673    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1674
1675    vqaddSCode = '''
1676        destElem = srcElem1 + srcElem2;
1677        FPSCR fpscr = (FPSCR) FpscrQc;
1678        bool negDest = (destElem < 0);
1679        bool negSrc1 = (srcElem1 < 0);
1680        bool negSrc2 = (srcElem2 < 0);
1681        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1682            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1683            if (negDest)
1684                destElem -= 1;
1685            fpscr.qc = 1;
1686        }
1687        FpscrQc = fpscr;
1688    '''
1689    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1690    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1691
1692    vqsubUCode = '''
1693        destElem = srcElem1 - srcElem2;
1694        FPSCR fpscr = (FPSCR) FpscrQc;
1695        if (destElem > srcElem1) {
1696            destElem = 0;
1697            fpscr.qc = 1;
1698        }
1699        FpscrQc = fpscr;
1700    '''
1701    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1702    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1703
1704    vqsubSCode = '''
1705        destElem = srcElem1 - srcElem2;
1706        FPSCR fpscr = (FPSCR) FpscrQc;
1707        bool negDest = (destElem < 0);
1708        bool negSrc1 = (srcElem1 < 0);
1709        bool posSrc2 = (srcElem2 >= 0);
1710        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1711            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1712            if (negDest)
1713                destElem -= 1;
1714            fpscr.qc = 1;
1715        }
1716        FpscrQc = fpscr;
1717    '''
1718    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1719    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1720
1721    vcgtCode = '''
1722        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1723    '''
1724    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1725    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1726
1727    vcgeCode = '''
1728        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1729    '''
1730    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1731    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1732
1733    vceqCode = '''
1734        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1735    '''
1736    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1737    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1738
1739    vshlCode = '''
1740        int16_t shiftAmt = (int8_t)srcElem2;
1741        if (shiftAmt < 0) {
1742            shiftAmt = -shiftAmt;
1743            if (shiftAmt >= sizeof(Element) * 8) {
1744                shiftAmt = sizeof(Element) * 8 - 1;
1745                destElem = 0;
1746            } else {
1747                destElem = (srcElem1 >> shiftAmt);
1748            }
1749            // Make sure the right shift sign extended when it should.
1750            if (ltz(srcElem1) && !ltz(destElem)) {
1751                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1752                                             1 - shiftAmt));
1753            }
1754        } else {
1755            if (shiftAmt >= sizeof(Element) * 8) {
1756                destElem = 0;
1757            } else {
1758                destElem = srcElem1 << shiftAmt;
1759            }
1760        }
1761    '''
1762    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1763    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1764
1765    vrshlCode = '''
1766        int16_t shiftAmt = (int8_t)srcElem2;
1767        if (shiftAmt < 0) {
1768            shiftAmt = -shiftAmt;
1769            Element rBit = 0;
1770            if (shiftAmt <= sizeof(Element) * 8)
1771                rBit = bits(srcElem1, shiftAmt - 1);
1772            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1773                rBit = 1;
1774            if (shiftAmt >= sizeof(Element) * 8) {
1775                shiftAmt = sizeof(Element) * 8 - 1;
1776                destElem = 0;
1777            } else {
1778                destElem = (srcElem1 >> shiftAmt);
1779            }
1780            // Make sure the right shift sign extended when it should.
1781            if (ltz(srcElem1) && !ltz(destElem)) {
1782                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1783                                             1 - shiftAmt));
1784            }
1785            destElem += rBit;
1786        } else if (shiftAmt > 0) {
1787            if (shiftAmt >= sizeof(Element) * 8) {
1788                destElem = 0;
1789            } else {
1790                destElem = srcElem1 << shiftAmt;
1791            }
1792        } else {
1793            destElem = srcElem1;
1794        }
1795    '''
1796    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1797    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1798
1799    vqshlUCode = '''
1800        int16_t shiftAmt = (int8_t)srcElem2;
1801        FPSCR fpscr = (FPSCR) FpscrQc;
1802        if (shiftAmt < 0) {
1803            shiftAmt = -shiftAmt;
1804            if (shiftAmt >= sizeof(Element) * 8) {
1805                shiftAmt = sizeof(Element) * 8 - 1;
1806                destElem = 0;
1807            } else {
1808                destElem = (srcElem1 >> shiftAmt);
1809            }
1810        } else if (shiftAmt > 0) {
1811            if (shiftAmt >= sizeof(Element) * 8) {
1812                if (srcElem1 != 0) {
1813                    destElem = mask(sizeof(Element) * 8);
1814                    fpscr.qc = 1;
1815                } else {
1816                    destElem = 0;
1817                }
1818            } else {
1819                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1820                            sizeof(Element) * 8 - shiftAmt)) {
1821                    destElem = mask(sizeof(Element) * 8);
1822                    fpscr.qc = 1;
1823                } else {
1824                    destElem = srcElem1 << shiftAmt;
1825                }
1826            }
1827        } else {
1828            destElem = srcElem1;
1829        }
1830        FpscrQc = fpscr;
1831    '''
1832    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1833    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1834
1835    vqshlSCode = '''
1836        int16_t shiftAmt = (int8_t)srcElem2;
1837        FPSCR fpscr = (FPSCR) FpscrQc;
1838        if (shiftAmt < 0) {
1839            shiftAmt = -shiftAmt;
1840            if (shiftAmt >= sizeof(Element) * 8) {
1841                shiftAmt = sizeof(Element) * 8 - 1;
1842                destElem = 0;
1843            } else {
1844                destElem = (srcElem1 >> shiftAmt);
1845            }
1846            // Make sure the right shift sign extended when it should.
1847            if (srcElem1 < 0 && destElem >= 0) {
1848                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1849                                             1 - shiftAmt));
1850            }
1851        } else if (shiftAmt > 0) {
1852            bool sat = false;
1853            if (shiftAmt >= sizeof(Element) * 8) {
1854                if (srcElem1 != 0)
1855                    sat = true;
1856                else
1857                    destElem = 0;
1858            } else {
1859                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1860                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1861                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1862                    sat = true;
1863                } else {
1864                    destElem = srcElem1 << shiftAmt;
1865                }
1866            }
1867            if (sat) {
1868                fpscr.qc = 1;
1869                destElem = mask(sizeof(Element) * 8 - 1);
1870                if (srcElem1 < 0)
1871                    destElem = ~destElem;
1872            }
1873        } else {
1874            destElem = srcElem1;
1875        }
1876        FpscrQc = fpscr;
1877    '''
1878    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1879    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1880
1881    vqrshlUCode = '''
1882        int16_t shiftAmt = (int8_t)srcElem2;
1883        FPSCR fpscr = (FPSCR) FpscrQc;
1884        if (shiftAmt < 0) {
1885            shiftAmt = -shiftAmt;
1886            Element rBit = 0;
1887            if (shiftAmt <= sizeof(Element) * 8)
1888                rBit = bits(srcElem1, shiftAmt - 1);
1889            if (shiftAmt >= sizeof(Element) * 8) {
1890                shiftAmt = sizeof(Element) * 8 - 1;
1891                destElem = 0;
1892            } else {
1893                destElem = (srcElem1 >> shiftAmt);
1894            }
1895            destElem += rBit;
1896        } else {
1897            if (shiftAmt >= sizeof(Element) * 8) {
1898                if (srcElem1 != 0) {
1899                    destElem = mask(sizeof(Element) * 8);
1900                    fpscr.qc = 1;
1901                } else {
1902                    destElem = 0;
1903                }
1904            } else {
1905                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1906                            sizeof(Element) * 8 - shiftAmt)) {
1907                    destElem = mask(sizeof(Element) * 8);
1908                    fpscr.qc = 1;
1909                } else {
1910                    destElem = srcElem1 << shiftAmt;
1911                }
1912            }
1913        }
1914        FpscrQc = fpscr;
1915    '''
1916    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1917    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1918
1919    vqrshlSCode = '''
1920        int16_t shiftAmt = (int8_t)srcElem2;
1921        FPSCR fpscr = (FPSCR) FpscrQc;
1922        if (shiftAmt < 0) {
1923            shiftAmt = -shiftAmt;
1924            Element rBit = 0;
1925            if (shiftAmt <= sizeof(Element) * 8)
1926                rBit = bits(srcElem1, shiftAmt - 1);
1927            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1928                rBit = 1;
1929            if (shiftAmt >= sizeof(Element) * 8) {
1930                shiftAmt = sizeof(Element) * 8 - 1;
1931                destElem = 0;
1932            } else {
1933                destElem = (srcElem1 >> shiftAmt);
1934            }
1935            // Make sure the right shift sign extended when it should.
1936            if (srcElem1 < 0 && destElem >= 0) {
1937                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1938                                             1 - shiftAmt));
1939            }
1940            destElem += rBit;
1941        } else if (shiftAmt > 0) {
1942            bool sat = false;
1943            if (shiftAmt >= sizeof(Element) * 8) {
1944                if (srcElem1 != 0)
1945                    sat = true;
1946                else
1947                    destElem = 0;
1948            } else {
1949                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1950                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1951                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1952                    sat = true;
1953                } else {
1954                    destElem = srcElem1 << shiftAmt;
1955                }
1956            }
1957            if (sat) {
1958                fpscr.qc = 1;
1959                destElem = mask(sizeof(Element) * 8 - 1);
1960                if (srcElem1 < 0)
1961                    destElem = ~destElem;
1962            }
1963        } else {
1964            destElem = srcElem1;
1965        }
1966        FpscrQc = fpscr;
1967    '''
1968    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1969    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1970
1971    vabaCode = '''
1972        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1973                                            (srcElem2 - srcElem1);
1974    '''
1975    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1976    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1977    vabalCode = '''
1978        destElem += (srcElem1 > srcElem2) ?
1979            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1980            ((BigElement)srcElem2 - (BigElement)srcElem1);
1981    '''
1982    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1983
1984    vabdCode = '''
1985        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1986                                           (srcElem2 - srcElem1);
1987    '''
1988    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1989    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1990    vabdlCode = '''
1991        destElem = (srcElem1 > srcElem2) ?
1992            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1993            ((BigElement)srcElem2 - (BigElement)srcElem1);
1994    '''
1995    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1996
1997    vtstCode = '''
1998        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1999    '''
2000    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2001    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2002
2003    vmulCode = '''
2004        destElem = srcElem1 * srcElem2;
2005    '''
2006    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2007    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2008    vmullCode = '''
2009        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2010    '''
2011    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2012
2013    vmlaCode = '''
2014        destElem = destElem + srcElem1 * srcElem2;
2015    '''
2016    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2017    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2018    vmlalCode = '''
2019        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2020    '''
2021    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2022
2023    vqdmlalCode = '''
2024        FPSCR fpscr = (FPSCR) FpscrQc;
2025        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2026        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2027        Element halfNeg = maxNeg / 2;
2028        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2029            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2030            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2031            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2032            fpscr.qc = 1;
2033        }
2034        bool negPreDest = ltz(destElem);
2035        destElem += midElem;
2036        bool negDest = ltz(destElem);
2037        bool negMid = ltz(midElem);
2038        if (negPreDest == negMid && negMid != negDest) {
2039            destElem = mask(sizeof(BigElement) * 8 - 1);
2040            if (negPreDest)
2041                destElem = ~destElem;
2042            fpscr.qc = 1;
2043        }
2044        FpscrQc = fpscr;
2045    '''
2046    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2047
2048    vqdmlslCode = '''
2049        FPSCR fpscr = (FPSCR) FpscrQc;
2050        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2051        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2052        Element halfNeg = maxNeg / 2;
2053        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2054            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2055            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2056            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2057            fpscr.qc = 1;
2058        }
2059        bool negPreDest = ltz(destElem);
2060        destElem -= midElem;
2061        bool negDest = ltz(destElem);
2062        bool posMid = ltz((BigElement)-midElem);
2063        if (negPreDest == posMid && posMid != negDest) {
2064            destElem = mask(sizeof(BigElement) * 8 - 1);
2065            if (negPreDest)
2066                destElem = ~destElem;
2067            fpscr.qc = 1;
2068        }
2069        FpscrQc = fpscr;
2070    '''
2071    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2072
2073    vqdmullCode = '''
2074        FPSCR fpscr = (FPSCR) FpscrQc;
2075        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2076        if (srcElem1 == srcElem2 &&
2077                srcElem1 == (Element)((Element)1 <<
2078                    (Element)(sizeof(Element) * 8 - 1))) {
2079            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2080            fpscr.qc = 1;
2081        }
2082        FpscrQc = fpscr;
2083    '''
2084    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2085
2086    vmlsCode = '''
2087        destElem = destElem - srcElem1 * srcElem2;
2088    '''
2089    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2090    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2091    vmlslCode = '''
2092        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2093    '''
2094    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2095
2096    vmulpCode = '''
2097        destElem = 0;
2098        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2099            if (bits(srcElem2, j))
2100                destElem ^= srcElem1 << j;
2101        }
2102    '''
2103    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2104    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2105    vmullpCode = '''
2106        destElem = 0;
2107        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2108            if (bits(srcElem2, j))
2109                destElem ^= (BigElement)srcElem1 << j;
2110        }
2111    '''
2112    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2113
2114    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2115
2116    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2117
2118    vqdmulhCode = '''
2119        FPSCR fpscr = (FPSCR) FpscrQc;
2120        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2121                   (sizeof(Element) * 8);
2122        if (srcElem1 == srcElem2 &&
2123                srcElem1 == (Element)((Element)1 <<
2124                    (sizeof(Element) * 8 - 1))) {
2125            destElem = ~srcElem1;
2126            fpscr.qc = 1;
2127        }
2128        FpscrQc = fpscr;
2129    '''
2130    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2131    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2132
2133    vqrdmulhCode = '''
2134        FPSCR fpscr = (FPSCR) FpscrQc;
2135        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2136                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2137                   (sizeof(Element) * 8);
2138        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2139        Element halfNeg = maxNeg / 2;
2140        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2141            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2142            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2143            if (destElem < 0) {
2144                destElem = mask(sizeof(Element) * 8 - 1);
2145            } else {
2146                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2147            }
2148            fpscr.qc = 1;
2149        }
2150        FpscrQc = fpscr;
2151    '''
2152    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2153            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2154    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2155            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2156
2157    vmaxfpCode = '''
2158        FPSCR fpscr = (FPSCR) FpscrExc;
2159        bool done;
2160        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2161        if (!done) {
2162            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2163                               true, true, VfpRoundNearest);
2164        } else if (flushToZero(srcReg1, srcReg2)) {
2165            fpscr.idc = 1;
2166        }
2167        FpscrExc = fpscr;
2168    '''
2169    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2170    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2171
2172    vminfpCode = '''
2173        FPSCR fpscr = (FPSCR) FpscrExc;
2174        bool done;
2175        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2176        if (!done) {
2177            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2178                               true, true, VfpRoundNearest);
2179        } else if (flushToZero(srcReg1, srcReg2)) {
2180            fpscr.idc = 1;
2181        }
2182        FpscrExc = fpscr;
2183    '''
2184    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2185    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2186
2187    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2188                        2, vmaxfpCode, pairwise=True)
2189    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2190                        4, vmaxfpCode, pairwise=True)
2191
2192    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2193                        2, vminfpCode, pairwise=True)
2194    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2195                        4, vminfpCode, pairwise=True)
2196
2197    vaddfpCode = '''
2198        FPSCR fpscr = (FPSCR) FpscrExc;
2199        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2200                           true, true, VfpRoundNearest);
2201        FpscrExc = fpscr;
2202    '''
2203    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2204    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2205
2206    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2207                        2, vaddfpCode, pairwise=True)
2208    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2209                        4, vaddfpCode, pairwise=True)
2210
2211    vsubfpCode = '''
2212        FPSCR fpscr = (FPSCR) FpscrExc;
2213        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2214                           true, true, VfpRoundNearest);
2215        FpscrExc = fpscr;
2216    '''
2217    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2218    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2219
2220    vmulfpCode = '''
2221        FPSCR fpscr = (FPSCR) FpscrExc;
2222        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2223                           true, true, VfpRoundNearest);
2224        FpscrExc = fpscr;
2225    '''
2226    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2227    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2228
2229    vmlafpCode = '''
2230        FPSCR fpscr = (FPSCR) FpscrExc;
2231        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2232                             true, true, VfpRoundNearest);
2233        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2234                           true, true, VfpRoundNearest);
2235        FpscrExc = fpscr;
2236    '''
2237    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2238    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2239
2240    vmlsfpCode = '''
2241        FPSCR fpscr = (FPSCR) FpscrExc;
2242        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2243                             true, true, VfpRoundNearest);
2244        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2245                           true, true, VfpRoundNearest);
2246        FpscrExc = fpscr;
2247    '''
2248    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2249    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2250
2251    vcgtfpCode = '''
2252        FPSCR fpscr = (FPSCR) FpscrExc;
2253        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2254                             true, true, VfpRoundNearest);
2255        destReg = (res == 0) ? -1 : 0;
2256        if (res == 2.0)
2257            fpscr.ioc = 1;
2258        FpscrExc = fpscr;
2259    '''
2260    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2261            2, vcgtfpCode, toInt = True)
2262    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2263            4, vcgtfpCode, toInt = True)
2264
2265    vcgefpCode = '''
2266        FPSCR fpscr = (FPSCR) FpscrExc;
2267        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2268                             true, true, VfpRoundNearest);
2269        destReg = (res == 0) ? -1 : 0;
2270        if (res == 2.0)
2271            fpscr.ioc = 1;
2272        FpscrExc = fpscr;
2273    '''
2274    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2275            2, vcgefpCode, toInt = True)
2276    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2277            4, vcgefpCode, toInt = True)
2278
2279    vacgtfpCode = '''
2280        FPSCR fpscr = (FPSCR) FpscrExc;
2281        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2282                             true, true, VfpRoundNearest);
2283        destReg = (res == 0) ? -1 : 0;
2284        if (res == 2.0)
2285            fpscr.ioc = 1;
2286        FpscrExc = fpscr;
2287    '''
2288    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2289            2, vacgtfpCode, toInt = True)
2290    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2291            4, vacgtfpCode, toInt = True)
2292
2293    vacgefpCode = '''
2294        FPSCR fpscr = (FPSCR) FpscrExc;
2295        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2296                             true, true, VfpRoundNearest);
2297        destReg = (res == 0) ? -1 : 0;
2298        if (res == 2.0)
2299            fpscr.ioc = 1;
2300        FpscrExc = fpscr;
2301    '''
2302    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2303            2, vacgefpCode, toInt = True)
2304    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2305            4, vacgefpCode, toInt = True)
2306
2307    vceqfpCode = '''
2308        FPSCR fpscr = (FPSCR) FpscrExc;
2309        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2310                             true, true, VfpRoundNearest);
2311        destReg = (res == 0) ? -1 : 0;
2312        if (res == 2.0)
2313            fpscr.ioc = 1;
2314        FpscrExc = fpscr;
2315    '''
2316    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2317            2, vceqfpCode, toInt = True)
2318    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2319            4, vceqfpCode, toInt = True)
2320
2321    vrecpsCode = '''
2322        FPSCR fpscr = (FPSCR) FpscrExc;
2323        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2324                           true, true, VfpRoundNearest);
2325        FpscrExc = fpscr;
2326    '''
2327    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2328    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2329
2330    vrsqrtsCode = '''
2331        FPSCR fpscr = (FPSCR) FpscrExc;
2332        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2333                           true, true, VfpRoundNearest);
2334        FpscrExc = fpscr;
2335    '''
2336    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2337    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2338
2339    vabdfpCode = '''
2340        FPSCR fpscr = (FPSCR) FpscrExc;
2341        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2342                             true, true, VfpRoundNearest);
2343        destReg = fabs(mid);
2344        FpscrExc = fpscr;
2345    '''
2346    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2347    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2348
2349    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2350    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2351    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2352    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2353    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2354
2355    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2356    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2357    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2358    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2359    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2360
2361    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2362    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2363    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2364    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2365    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2366
2367    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2368    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2369    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2370    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2371    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2372    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2373            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2374    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2375            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2376
2377    vshrCode = '''
2378        if (imm >= sizeof(srcElem1) * 8) {
2379            if (ltz(srcElem1))
2380                destElem = -1;
2381            else
2382                destElem = 0;
2383        } else {
2384            destElem = srcElem1 >> imm;
2385        }
2386    '''
2387    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2388    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2389
2390    vsraCode = '''
2391        Element mid;;
2392        if (imm >= sizeof(srcElem1) * 8) {
2393            mid = ltz(srcElem1) ? -1 : 0;
2394        } else {
2395            mid = srcElem1 >> imm;
2396            if (ltz(srcElem1) && !ltz(mid)) {
2397                mid |= -(mid & ((Element)1 <<
2398                            (sizeof(Element) * 8 - 1 - imm)));
2399            }
2400        }
2401        destElem += mid;
2402    '''
2403    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2404    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2405
2406    vrshrCode = '''
2407        if (imm > sizeof(srcElem1) * 8) {
2408            destElem = 0;
2409        } else if (imm) {
2410            Element rBit = bits(srcElem1, imm - 1);
2411            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2412        } else {
2413            destElem = srcElem1;
2414        }
2415    '''
2416    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2417    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2418
2419    vrsraCode = '''
2420        if (imm > sizeof(srcElem1) * 8) {
2421            destElem += 0;
2422        } else if (imm) {
2423            Element rBit = bits(srcElem1, imm - 1);
2424            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2425        } else {
2426            destElem += srcElem1;
2427        }
2428    '''
2429    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2430    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2431
2432    vsriCode = '''
2433        if (imm >= sizeof(Element) * 8)
2434            destElem = destElem;
2435        else
2436            destElem = (srcElem1 >> imm) |
2437                (destElem & ~mask(sizeof(Element) * 8 - imm));
2438    '''
2439    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2440    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2441
2442    vshlCode = '''
2443        if (imm >= sizeof(Element) * 8)
2444            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2445        else
2446            destElem = srcElem1 << imm;
2447    '''
2448    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2449    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2450
2451    vsliCode = '''
2452        if (imm >= sizeof(Element) * 8)
2453            destElem = destElem;
2454        else
2455            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2456    '''
2457    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2458    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2459
2460    vqshlCode = '''
2461        FPSCR fpscr = (FPSCR) FpscrQc;
2462        if (imm >= sizeof(Element) * 8) {
2463            if (srcElem1 != 0) {
2464                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2465                if (srcElem1 > 0)
2466                    destElem = ~destElem;
2467                fpscr.qc = 1;
2468            } else {
2469                destElem = 0;
2470            }
2471        } else if (imm) {
2472            destElem = (srcElem1 << imm);
2473            uint64_t topBits = bits((uint64_t)srcElem1,
2474                                    sizeof(Element) * 8 - 1,
2475                                    sizeof(Element) * 8 - 1 - imm);
2476            if (topBits != 0 && topBits != mask(imm + 1)) {
2477                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2478                if (srcElem1 > 0)
2479                    destElem = ~destElem;
2480                fpscr.qc = 1;
2481            }
2482        } else {
2483            destElem = srcElem1;
2484        }
2485        FpscrQc = fpscr;
2486    '''
2487    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2488    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2489
2490    vqshluCode = '''
2491        FPSCR fpscr = (FPSCR) FpscrQc;
2492        if (imm >= sizeof(Element) * 8) {
2493            if (srcElem1 != 0) {
2494                destElem = mask(sizeof(Element) * 8);
2495                fpscr.qc = 1;
2496            } else {
2497                destElem = 0;
2498            }
2499        } else if (imm) {
2500            destElem = (srcElem1 << imm);
2501            uint64_t topBits = bits((uint64_t)srcElem1,
2502                                    sizeof(Element) * 8 - 1,
2503                                    sizeof(Element) * 8 - imm);
2504            if (topBits != 0) {
2505                destElem = mask(sizeof(Element) * 8);
2506                fpscr.qc = 1;
2507            }
2508        } else {
2509            destElem = srcElem1;
2510        }
2511        FpscrQc = fpscr;
2512    '''
2513    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2514    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2515
2516    vqshlusCode = '''
2517        FPSCR fpscr = (FPSCR) FpscrQc;
2518        if (imm >= sizeof(Element) * 8) {
2519            if (srcElem1 < 0) {
2520                destElem = 0;
2521                fpscr.qc = 1;
2522            } else if (srcElem1 > 0) {
2523                destElem = mask(sizeof(Element) * 8);
2524                fpscr.qc = 1;
2525            } else {
2526                destElem = 0;
2527            }
2528        } else if (imm) {
2529            destElem = (srcElem1 << imm);
2530            uint64_t topBits = bits((uint64_t)srcElem1,
2531                                    sizeof(Element) * 8 - 1,
2532                                    sizeof(Element) * 8 - imm);
2533            if (srcElem1 < 0) {
2534                destElem = 0;
2535                fpscr.qc = 1;
2536            } else if (topBits != 0) {
2537                destElem = mask(sizeof(Element) * 8);
2538                fpscr.qc = 1;
2539            }
2540        } else {
2541            if (srcElem1 < 0) {
2542                fpscr.qc = 1;
2543                destElem = 0;
2544            } else {
2545                destElem = srcElem1;
2546            }
2547        }
2548        FpscrQc = fpscr;
2549    '''
2550    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2551    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2552
2553    vshrnCode = '''
2554        if (imm >= sizeof(srcElem1) * 8) {
2555            destElem = 0;
2556        } else {
2557            destElem = srcElem1 >> imm;
2558        }
2559    '''
2560    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2561
2562    vrshrnCode = '''
2563        if (imm > sizeof(srcElem1) * 8) {
2564            destElem = 0;
2565        } else if (imm) {
2566            Element rBit = bits(srcElem1, imm - 1);
2567            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2568        } else {
2569            destElem = srcElem1;
2570        }
2571    '''
2572    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2573
2574    vqshrnCode = '''
2575        FPSCR fpscr = (FPSCR) FpscrQc;
2576        if (imm > sizeof(srcElem1) * 8) {
2577            if (srcElem1 != 0 && srcElem1 != -1)
2578                fpscr.qc = 1;
2579            destElem = 0;
2580        } else if (imm) {
2581            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2582            mid |= -(mid & ((BigElement)1 <<
2583                        (sizeof(BigElement) * 8 - 1 - imm)));
2584            if (mid != (Element)mid) {
2585                destElem = mask(sizeof(Element) * 8 - 1);
2586                if (srcElem1 < 0)
2587                    destElem = ~destElem;
2588                fpscr.qc = 1;
2589            } else {
2590                destElem = mid;
2591            }
2592        } else {
2593            destElem = srcElem1;
2594        }
2595        FpscrQc = fpscr;
2596    '''
2597    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2598
2599    vqshrunCode = '''
2600        FPSCR fpscr = (FPSCR) FpscrQc;
2601        if (imm > sizeof(srcElem1) * 8) {
2602            if (srcElem1 != 0)
2603                fpscr.qc = 1;
2604            destElem = 0;
2605        } else if (imm) {
2606            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2607            if (mid != (Element)mid) {
2608                destElem = mask(sizeof(Element) * 8);
2609                fpscr.qc = 1;
2610            } else {
2611                destElem = mid;
2612            }
2613        } else {
2614            destElem = srcElem1;
2615        }
2616        FpscrQc = fpscr;
2617    '''
2618    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2619                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2620
2621    vqshrunsCode = '''
2622        FPSCR fpscr = (FPSCR) FpscrQc;
2623        if (imm > sizeof(srcElem1) * 8) {
2624            if (srcElem1 != 0)
2625                fpscr.qc = 1;
2626            destElem = 0;
2627        } else if (imm) {
2628            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2629            if (bits(mid, sizeof(BigElement) * 8 - 1,
2630                          sizeof(Element) * 8) != 0) {
2631                if (srcElem1 < 0) {
2632                    destElem = 0;
2633                } else {
2634                    destElem = mask(sizeof(Element) * 8);
2635                }
2636                fpscr.qc = 1;
2637            } else {
2638                destElem = mid;
2639            }
2640        } else {
2641            destElem = srcElem1;
2642        }
2643        FpscrQc = fpscr;
2644    '''
2645    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2646                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2647
2648    vqrshrnCode = '''
2649        FPSCR fpscr = (FPSCR) FpscrQc;
2650        if (imm > sizeof(srcElem1) * 8) {
2651            if (srcElem1 != 0 && srcElem1 != -1)
2652                fpscr.qc = 1;
2653            destElem = 0;
2654        } else if (imm) {
2655            BigElement mid = (srcElem1 >> (imm - 1));
2656            uint64_t rBit = mid & 0x1;
2657            mid >>= 1;
2658            mid |= -(mid & ((BigElement)1 <<
2659                        (sizeof(BigElement) * 8 - 1 - imm)));
2660            mid += rBit;
2661            if (mid != (Element)mid) {
2662                destElem = mask(sizeof(Element) * 8 - 1);
2663                if (srcElem1 < 0)
2664                    destElem = ~destElem;
2665                fpscr.qc = 1;
2666            } else {
2667                destElem = mid;
2668            }
2669        } else {
2670            if (srcElem1 != (Element)srcElem1) {
2671                destElem = mask(sizeof(Element) * 8 - 1);
2672                if (srcElem1 < 0)
2673                    destElem = ~destElem;
2674                fpscr.qc = 1;
2675            } else {
2676                destElem = srcElem1;
2677            }
2678        }
2679        FpscrQc = fpscr;
2680    '''
2681    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2682                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2683
2684    vqrshrunCode = '''
2685        FPSCR fpscr = (FPSCR) FpscrQc;
2686        if (imm > sizeof(srcElem1) * 8) {
2687            if (srcElem1 != 0)
2688                fpscr.qc = 1;
2689            destElem = 0;
2690        } else if (imm) {
2691            BigElement mid = (srcElem1 >> (imm - 1));
2692            uint64_t rBit = mid & 0x1;
2693            mid >>= 1;
2694            mid += rBit;
2695            if (mid != (Element)mid) {
2696                destElem = mask(sizeof(Element) * 8);
2697                fpscr.qc = 1;
2698            } else {
2699                destElem = mid;
2700            }
2701        } else {
2702            if (srcElem1 != (Element)srcElem1) {
2703                destElem = mask(sizeof(Element) * 8 - 1);
2704                fpscr.qc = 1;
2705            } else {
2706                destElem = srcElem1;
2707            }
2708        }
2709        FpscrQc = fpscr;
2710    '''
2711    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2712                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2713
2714    vqrshrunsCode = '''
2715        FPSCR fpscr = (FPSCR) FpscrQc;
2716        if (imm > sizeof(srcElem1) * 8) {
2717            if (srcElem1 != 0)
2718                fpscr.qc = 1;
2719            destElem = 0;
2720        } else if (imm) {
2721            BigElement mid = (srcElem1 >> (imm - 1));
2722            uint64_t rBit = mid & 0x1;
2723            mid >>= 1;
2724            mid |= -(mid & ((BigElement)1 <<
2725                            (sizeof(BigElement) * 8 - 1 - imm)));
2726            mid += rBit;
2727            if (bits(mid, sizeof(BigElement) * 8 - 1,
2728                          sizeof(Element) * 8) != 0) {
2729                if (srcElem1 < 0) {
2730                    destElem = 0;
2731                } else {
2732                    destElem = mask(sizeof(Element) * 8);
2733                }
2734                fpscr.qc = 1;
2735            } else {
2736                destElem = mid;
2737            }
2738        } else {
2739            if (srcElem1 < 0) {
2740                fpscr.qc = 1;
2741                destElem = 0;
2742            } else {
2743                destElem = srcElem1;
2744            }
2745        }
2746        FpscrQc = fpscr;
2747    '''
2748    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2749                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2750
2751    vshllCode = '''
2752        if (imm >= sizeof(destElem) * 8) {
2753            destElem = 0;
2754        } else {
2755            destElem = (BigElement)srcElem1 << imm;
2756        }
2757    '''
2758    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2759
2760    vmovlCode = '''
2761        destElem = srcElem1;
2762    '''
2763    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2764
2765    vcvt2ufxCode = '''
2766        FPSCR fpscr = (FPSCR) FpscrExc;
2767        if (flushToZero(srcElem1))
2768            fpscr.idc = 1;
2769        VfpSavedState state = prepFpState(VfpRoundNearest);
2770        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2771        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2772        __asm__ __volatile__("" :: "m" (destReg));
2773        finishVfp(fpscr, state, true);
2774        FpscrExc = fpscr;
2775    '''
2776    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2777            2, vcvt2ufxCode, toInt = True)
2778    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2779            4, vcvt2ufxCode, toInt = True)
2780
2781    vcvt2sfxCode = '''
2782        FPSCR fpscr = (FPSCR) FpscrExc;
2783        if (flushToZero(srcElem1))
2784            fpscr.idc = 1;
2785        VfpSavedState state = prepFpState(VfpRoundNearest);
2786        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2787        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2788        __asm__ __volatile__("" :: "m" (destReg));
2789        finishVfp(fpscr, state, true);
2790        FpscrExc = fpscr;
2791    '''
2792    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2793            2, vcvt2sfxCode, toInt = True)
2794    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2795            4, vcvt2sfxCode, toInt = True)
2796
2797    vcvtu2fpCode = '''
2798        FPSCR fpscr = (FPSCR) FpscrExc;
2799        VfpSavedState state = prepFpState(VfpRoundNearest);
2800        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2801        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2802        __asm__ __volatile__("" :: "m" (destElem));
2803        finishVfp(fpscr, state, true);
2804        FpscrExc = fpscr;
2805    '''
2806    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2807            2, vcvtu2fpCode, fromInt = True)
2808    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2809            4, vcvtu2fpCode, fromInt = True)
2810
2811    vcvts2fpCode = '''
2812        FPSCR fpscr = (FPSCR) FpscrExc;
2813        VfpSavedState state = prepFpState(VfpRoundNearest);
2814        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2815        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2816        __asm__ __volatile__("" :: "m" (destElem));
2817        finishVfp(fpscr, state, true);
2818        FpscrExc = fpscr;
2819    '''
2820    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2821            2, vcvts2fpCode, fromInt = True)
2822    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2823            4, vcvts2fpCode, fromInt = True)
2824
2825    vcvts2hCode = '''
2826        FPSCR fpscr = (FPSCR) FpscrExc;
2827        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2828        if (flushToZero(srcFp1))
2829            fpscr.idc = 1;
2830        VfpSavedState state = prepFpState(VfpRoundNearest);
2831        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2832                                : "m" (srcFp1), "m" (destElem));
2833        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2834                              fpscr.ahp, srcFp1);
2835        __asm__ __volatile__("" :: "m" (destElem));
2836        finishVfp(fpscr, state, true);
2837        FpscrExc = fpscr;
2838    '''
2839    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2840
2841    vcvth2sCode = '''
2842        FPSCR fpscr = (FPSCR) FpscrExc;
2843        VfpSavedState state = prepFpState(VfpRoundNearest);
2844        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2845                                : "m" (srcElem1), "m" (destElem));
2846        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2847        __asm__ __volatile__("" :: "m" (destElem));
2848        finishVfp(fpscr, state, true);
2849        FpscrExc = fpscr;
2850    '''
2851    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2852
2853    vrsqrteCode = '''
2854        destElem = unsignedRSqrtEstimate(srcElem1);
2855    '''
2856    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2857    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2858
2859    vrsqrtefpCode = '''
2860        FPSCR fpscr = (FPSCR) FpscrExc;
2861        if (flushToZero(srcReg1))
2862            fpscr.idc = 1;
2863        destReg = fprSqrtEstimate(fpscr, srcReg1);
2864        FpscrExc = fpscr;
2865    '''
2866    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2867    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2868
2869    vrecpeCode = '''
2870        destElem = unsignedRecipEstimate(srcElem1);
2871    '''
2872    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2873    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2874
2875    vrecpefpCode = '''
2876        FPSCR fpscr = (FPSCR) FpscrExc;
2877        if (flushToZero(srcReg1))
2878            fpscr.idc = 1;
2879        destReg = fpRecipEstimate(fpscr, srcReg1);
2880        FpscrExc = fpscr;
2881    '''
2882    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2883    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2884
2885    vrev16Code = '''
2886        destElem = srcElem1;
2887        unsigned groupSize = ((1 << 1) / sizeof(Element));
2888        unsigned reverseMask = (groupSize - 1);
2889        j = i ^ reverseMask;
2890    '''
2891    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2892    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2893    vrev32Code = '''
2894        destElem = srcElem1;
2895        unsigned groupSize = ((1 << 2) / sizeof(Element));
2896        unsigned reverseMask = (groupSize - 1);
2897        j = i ^ reverseMask;
2898    '''
2899    twoRegMiscInst("vrev32", "NVrev32D",
2900            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2901    twoRegMiscInst("vrev32", "NVrev32Q",
2902            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2903    vrev64Code = '''
2904        destElem = srcElem1;
2905        unsigned groupSize = ((1 << 3) / sizeof(Element));
2906        unsigned reverseMask = (groupSize - 1);
2907        j = i ^ reverseMask;
2908    '''
2909    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2910    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2911
2912    vpaddlCode = '''
2913        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2914    '''
2915    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2916    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2917
2918    vpadalCode = '''
2919        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2920    '''
2921    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2922    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2923
2924    vclsCode = '''
2925        unsigned count = 0;
2926        if (srcElem1 < 0) {
2927            srcElem1 <<= 1;
2928            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2929                count++;
2930                srcElem1 <<= 1;
2931            }
2932        } else {
2933            srcElem1 <<= 1;
2934            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2935                count++;
2936                srcElem1 <<= 1;
2937            }
2938        }
2939        destElem = count;
2940    '''
2941    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2942    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2943
2944    vclzCode = '''
2945        unsigned count = 0;
2946        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2947            count++;
2948            srcElem1 <<= 1;
2949        }
2950        destElem = count;
2951    '''
2952    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2953    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2954
2955    vcntCode = '''
2956        unsigned count = 0;
2957        while (srcElem1 && count < sizeof(Element) * 8) {
2958            count += srcElem1 & 0x1;
2959            srcElem1 >>= 1;
2960        }
2961        destElem = count;
2962    '''
2963
2964    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2965    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2966
2967    vmvnCode = '''
2968        destElem = ~srcElem1;
2969    '''
2970    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2971    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2972
2973    vqabsCode = '''
2974        FPSCR fpscr = (FPSCR) FpscrQc;
2975        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2976            fpscr.qc = 1;
2977            destElem = ~srcElem1;
2978        } else if (srcElem1 < 0) {
2979            destElem = -srcElem1;
2980        } else {
2981            destElem = srcElem1;
2982        }
2983        FpscrQc = fpscr;
2984    '''
2985    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2986    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2987
2988    vqnegCode = '''
2989        FPSCR fpscr = (FPSCR) FpscrQc;
2990        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2991            fpscr.qc = 1;
2992            destElem = ~srcElem1;
2993        } else {
2994            destElem = -srcElem1;
2995        }
2996        FpscrQc = fpscr;
2997    '''
2998    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2999    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3000
3001    vabsCode = '''
3002        if (srcElem1 < 0) {
3003            destElem = -srcElem1;
3004        } else {
3005            destElem = srcElem1;
3006        }
3007    '''
3008
3009    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3010    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3011    vabsfpCode = '''
3012        union
3013        {
3014            uint32_t i;
3015            float f;
3016        } cStruct;
3017        cStruct.f = srcReg1;
3018        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3019        destReg = cStruct.f;
3020    '''
3021    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3022    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3023
3024    vnegCode = '''
3025        destElem = -srcElem1;
3026    '''
3027    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3028    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3029    vnegfpCode = '''
3030        destReg = -srcReg1;
3031    '''
3032    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3033    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3034
3035    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3036    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3037    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3038    vcgtfpCode = '''
3039        FPSCR fpscr = (FPSCR) FpscrExc;
3040        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3041                             true, true, VfpRoundNearest);
3042        destReg = (res == 0) ? -1 : 0;
3043        if (res == 2.0)
3044            fpscr.ioc = 1;
3045        FpscrExc = fpscr;
3046    '''
3047    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3048            2, vcgtfpCode, toInt = True)
3049    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3050            4, vcgtfpCode, toInt = True)
3051
3052    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3053    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3054    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3055    vcgefpCode = '''
3056        FPSCR fpscr = (FPSCR) FpscrExc;
3057        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3058                             true, true, VfpRoundNearest);
3059        destReg = (res == 0) ? -1 : 0;
3060        if (res == 2.0)
3061            fpscr.ioc = 1;
3062        FpscrExc = fpscr;
3063    '''
3064    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3065            2, vcgefpCode, toInt = True)
3066    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3067            4, vcgefpCode, toInt = True)
3068
3069    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3070    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3071    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3072    vceqfpCode = '''
3073        FPSCR fpscr = (FPSCR) FpscrExc;
3074        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3075                             true, true, VfpRoundNearest);
3076        destReg = (res == 0) ? -1 : 0;
3077        if (res == 2.0)
3078            fpscr.ioc = 1;
3079        FpscrExc = fpscr;
3080    '''
3081    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3082            2, vceqfpCode, toInt = True)
3083    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3084            4, vceqfpCode, toInt = True)
3085
3086    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3087    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3088    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3089    vclefpCode = '''
3090        FPSCR fpscr = (FPSCR) FpscrExc;
3091        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3092                             true, true, VfpRoundNearest);
3093        destReg = (res == 0) ? -1 : 0;
3094        if (res == 2.0)
3095            fpscr.ioc = 1;
3096        FpscrExc = fpscr;
3097    '''
3098    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3099            2, vclefpCode, toInt = True)
3100    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3101            4, vclefpCode, toInt = True)
3102
3103    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3104    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3105    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3106    vcltfpCode = '''
3107        FPSCR fpscr = (FPSCR) FpscrExc;
3108        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3109                             true, true, VfpRoundNearest);
3110        destReg = (res == 0) ? -1 : 0;
3111        if (res == 2.0)
3112            fpscr.ioc = 1;
3113        FpscrExc = fpscr;
3114    '''
3115    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3116            2, vcltfpCode, toInt = True)
3117    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3118            4, vcltfpCode, toInt = True)
3119
3120    vswpCode = '''
3121        FloatRegBits mid;
3122        for (unsigned r = 0; r < rCount; r++) {
3123            mid = srcReg1.regs[r];
3124            srcReg1.regs[r] = destReg.regs[r];
3125            destReg.regs[r] = mid;
3126        }
3127    '''
3128    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3129    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3130
3131    vtrnCode = '''
3132        Element mid;
3133        for (unsigned i = 0; i < eCount; i += 2) {
3134            mid = srcReg1.elements[i];
3135            srcReg1.elements[i] = destReg.elements[i + 1];
3136            destReg.elements[i + 1] = mid;
3137        }
3138    '''
3139    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3140            smallUnsignedTypes, 2, vtrnCode)
3141    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3142            smallUnsignedTypes, 4, vtrnCode)
3143
3144    vuzpCode = '''
3145        Element mid[eCount];
3146        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3147        for (unsigned i = 0; i < eCount / 2; i++) {
3148            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3149            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3150            destReg.elements[i] = destReg.elements[2 * i];
3151        }
3152        for (unsigned i = 0; i < eCount / 2; i++) {
3153            destReg.elements[eCount / 2 + i] = mid[2 * i];
3154        }
3155    '''
3156    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3157    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3158
3159    vzipCode = '''
3160        Element mid[eCount];
3161        memcpy(&mid, &destReg, sizeof(destReg));
3162        for (unsigned i = 0; i < eCount / 2; i++) {
3163            destReg.elements[2 * i] = mid[i];
3164            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3165        }
3166        for (int i = 0; i < eCount / 2; i++) {
3167            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3168            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3169        }
3170    '''
3171    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3172    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3173
3174    vmovnCode = 'destElem = srcElem1;'
3175    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3176
3177    vdupCode = 'destElem = srcElem1;'
3178    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3179    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3180
3181    def vdupGprInst(name, Name, opClass, types, rCount):
3182        global header_output, exec_output
3183        eWalkCode = '''
3184        RegVect destReg;
3185        for (unsigned i = 0; i < eCount; i++) {
3186            destReg.elements[i] = htog((Element)Op1);
3187        }
3188        '''
3189        for reg in range(rCount):
3190            eWalkCode += '''
3191            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3192            ''' % { "reg" : reg }
3193        iop = InstObjParams(name, Name,
3194                            "RegRegOp",
3195                            { "code": eWalkCode,
3196                              "r_count": rCount,
3197                              "predicate_test": predicateTest,
3198                              "op_class": opClass }, [])
3199        header_output += NeonRegRegOpDeclare.subst(iop)
3200        exec_output += NeonEqualRegExecute.subst(iop)
3201        for type in types:
3202            substDict = { "targs" : type,
3203                          "class_name" : Name }
3204            exec_output += NeonExecDeclare.subst(substDict)
3205    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3206    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3207
3208    vmovCode = 'destElem = imm;'
3209    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3210    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3211
3212    vorrCode = 'destElem |= imm;'
3213    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3214    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3215
3216    vmvnCode = 'destElem = ~imm;'
3217    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3218    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3219
3220    vbicCode = 'destElem &= ~imm;'
3221    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3222    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3223
3224    vqmovnCode = '''
3225    FPSCR fpscr = (FPSCR) FpscrQc;
3226    destElem = srcElem1;
3227    if ((BigElement)destElem != srcElem1) {
3228        fpscr.qc = 1;
3229        destElem = mask(sizeof(Element) * 8 - 1);
3230        if (srcElem1 < 0)
3231            destElem = ~destElem;
3232    }
3233    FpscrQc = fpscr;
3234    '''
3235    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3236
3237    vqmovunCode = '''
3238    FPSCR fpscr = (FPSCR) FpscrQc;
3239    destElem = srcElem1;
3240    if ((BigElement)destElem != srcElem1) {
3241        fpscr.qc = 1;
3242        destElem = mask(sizeof(Element) * 8);
3243    }
3244    FpscrQc = fpscr;
3245    '''
3246    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3247            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3248
3249    vqmovunsCode = '''
3250    FPSCR fpscr = (FPSCR) FpscrQc;
3251    destElem = srcElem1;
3252    if (srcElem1 < 0 ||
3253            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3254        fpscr.qc = 1;
3255        destElem = mask(sizeof(Element) * 8);
3256        if (srcElem1 < 0)
3257            destElem = ~destElem;
3258    }
3259    FpscrQc = fpscr;
3260    '''
3261    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3262            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3263
3264    def buildVext(name, Name, opClass, types, rCount, op):
3265        global header_output, exec_output
3266        eWalkCode = '''
3267        RegVect srcReg1, srcReg2, destReg;
3268        '''
3269        for reg in range(rCount):
3270            eWalkCode += simdEnabledCheckCode + '''
3271                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3272                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3273            ''' % { "reg" : reg }
3274        eWalkCode += op
3275        for reg in range(rCount):
3276            eWalkCode += '''
3277            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3278            ''' % { "reg" : reg }
3279        iop = InstObjParams(name, Name,
3280                            "RegRegRegImmOp",
3281                            { "code": eWalkCode,
3282                              "r_count": rCount,
3283                              "predicate_test": predicateTest,
3284                              "op_class": opClass }, [])
3285        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3286        exec_output += NeonEqualRegExecute.subst(iop)
3287        for type in types:
3288            substDict = { "targs" : type,
3289                          "class_name" : Name }
3290            exec_output += NeonExecDeclare.subst(substDict)
3291
3292    vextCode = '''
3293        for (unsigned i = 0; i < eCount; i++) {
3294            unsigned index = i + imm;
3295            if (index < eCount) {
3296                destReg.elements[i] = srcReg1.elements[index];
3297            } else {
3298                index -= eCount;
3299                if (index >= eCount)
3300#if FULL_SYSTEM
3301                    fault = new UndefinedInstruction;
3302#else
3303                    fault = new UndefinedInstruction(false, mnemonic);
3304#endif
3305                else
3306                    destReg.elements[i] = srcReg2.elements[index];
3307            }
3308        }
3309    '''
3310    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3311    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3312
3313    def buildVtbxl(name, Name, opClass, length, isVtbl):
3314        global header_output, decoder_output, exec_output
3315        code = '''
3316            union
3317            {
3318                uint8_t bytes[32];
3319                FloatRegBits regs[8];
3320            } table;
3321
3322            union
3323            {
3324                uint8_t bytes[8];
3325                FloatRegBits regs[2];
3326            } destReg, srcReg2;
3327
3328            const unsigned length = %(length)d;
3329            const bool isVtbl = %(isVtbl)s;
3330
3331            srcReg2.regs[0] = htog(FpOp2P0_uw);
3332            srcReg2.regs[1] = htog(FpOp2P1_uw);
3333
3334            destReg.regs[0] = htog(FpDestP0_uw);
3335            destReg.regs[1] = htog(FpDestP1_uw);
3336        ''' % { "length" : length, "isVtbl" : isVtbl }
3337        for reg in range(8):
3338            if reg < length * 2:
3339                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3340                        { "reg" : reg }
3341            else:
3342                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3343        code += '''
3344        for (unsigned i = 0; i < sizeof(destReg); i++) {
3345            uint8_t index = srcReg2.bytes[i];
3346            if (index < 8 * length) {
3347                destReg.bytes[i] = table.bytes[index];
3348            } else {
3349                if (isVtbl)
3350                    destReg.bytes[i] = 0;
3351                // else destReg.bytes[i] unchanged
3352            }
3353        }
3354
3355        FpDestP0_uw = gtoh(destReg.regs[0]);
3356        FpDestP1_uw = gtoh(destReg.regs[1]);
3357        '''
3358        iop = InstObjParams(name, Name,
3359                            "RegRegRegOp",
3360                            { "code": code,
3361                              "predicate_test": predicateTest,
3362                              "op_class": opClass }, [])
3363        header_output += RegRegRegOpDeclare.subst(iop)
3364        decoder_output += RegRegRegOpConstructor.subst(iop)
3365        exec_output += PredOpExecute.subst(iop)
3366
3367    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3368    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3369    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3370    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3371
3372    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3373    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3374    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3375    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3376}};
3377