neon.isa revision 7760:e93e7e0caae1
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (isSnan(op1) || isSnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (isSnan(op1) || isSnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (isSnan(op1) || isSnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (isSnan(op1) || isSnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (isSnan(op1) || isSnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (isSnan(op1) || isSnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623
624    header_output = ""
625    exec_output = ""
626
627    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630    signedTypes = smallSignedTypes + ("int64_t",)
631    smallTypes = smallUnsignedTypes + smallSignedTypes
632    allTypes = unsignedTypes + signedTypes
633
634    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635                          readDest=False, pairwise=False):
636        global header_output, exec_output
637        eWalkCode = simdEnabledCheckCode + '''
638        RegVect srcReg1, srcReg2, destReg;
639        '''
640        for reg in range(rCount):
641            eWalkCode += '''
642                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
643                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
644            ''' % { "reg" : reg }
645            if readDest:
646                eWalkCode += '''
647                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
648                ''' % { "reg" : reg }
649        readDestCode = ''
650        if readDest:
651            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652        if pairwise:
653            eWalkCode += '''
654            for (unsigned i = 0; i < eCount; i++) {
655                Element srcElem1 = gtoh(2 * i < eCount ?
656                                        srcReg1.elements[2 * i] :
657                                        srcReg2.elements[2 * i - eCount]);
658                Element srcElem2 = gtoh(2 * i < eCount ?
659                                        srcReg1.elements[2 * i + 1] :
660                                        srcReg2.elements[2 * i + 1 - eCount]);
661                Element destElem;
662                %(readDest)s
663                %(op)s
664                destReg.elements[i] = htog(destElem);
665            }
666            ''' % { "op" : op, "readDest" : readDestCode }
667        else:
668            eWalkCode += '''
669            for (unsigned i = 0; i < eCount; i++) {
670                Element srcElem1 = gtoh(srcReg1.elements[i]);
671                Element srcElem2 = gtoh(srcReg2.elements[i]);
672                Element destElem;
673                %(readDest)s
674                %(op)s
675                destReg.elements[i] = htog(destElem);
676            }
677            ''' % { "op" : op, "readDest" : readDestCode }
678        for reg in range(rCount):
679            eWalkCode += '''
680            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
681            ''' % { "reg" : reg }
682        iop = InstObjParams(name, Name,
683                            "RegRegRegOp",
684                            { "code": eWalkCode,
685                              "r_count": rCount,
686                              "predicate_test": predicateTest,
687                              "op_class": opClass }, [])
688        header_output += NeonRegRegRegOpDeclare.subst(iop)
689        exec_output += NeonEqualRegExecute.subst(iop)
690        for type in types:
691            substDict = { "targs" : type,
692                          "class_name" : Name }
693            exec_output += NeonExecDeclare.subst(substDict)
694
695    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696                            readDest=False, pairwise=False, toInt=False):
697        global header_output, exec_output
698        eWalkCode = simdEnabledCheckCode + '''
699        typedef FloatReg FloatVect[rCount];
700        FloatVect srcRegs1, srcRegs2;
701        '''
702        if toInt:
703            eWalkCode += 'RegVect destRegs;\n'
704        else:
705            eWalkCode += 'FloatVect destRegs;\n'
706        for reg in range(rCount):
707            eWalkCode += '''
708                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710            ''' % { "reg" : reg }
711            if readDest:
712                if toInt:
713                    eWalkCode += '''
714                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715                    ''' % { "reg" : reg }
716                else:
717                    eWalkCode += '''
718                        destRegs[%(reg)d] = FpDestP%(reg)d;
719                    ''' % { "reg" : reg }
720        readDestCode = ''
721        if readDest:
722            readDestCode = 'destReg = destRegs[r];'
723        destType = 'FloatReg'
724        writeDest = 'destRegs[r] = destReg;'
725        if toInt:
726            destType = 'FloatRegBits'
727            writeDest = 'destRegs.regs[r] = destReg;'
728        if pairwise:
729            eWalkCode += '''
730            for (unsigned r = 0; r < rCount; r++) {
731                FloatReg srcReg1 = (2 * r < rCount) ?
732                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733                FloatReg srcReg2 = (2 * r < rCount) ?
734                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735                %(destType)s destReg;
736                %(readDest)s
737                %(op)s
738                %(writeDest)s
739            }
740            ''' % { "op" : op,
741                    "readDest" : readDestCode,
742                    "destType" : destType,
743                    "writeDest" : writeDest }
744        else:
745            eWalkCode += '''
746            for (unsigned r = 0; r < rCount; r++) {
747                FloatReg srcReg1 = srcRegs1[r];
748                FloatReg srcReg2 = srcRegs2[r];
749                %(destType)s destReg;
750                %(readDest)s
751                %(op)s
752                %(writeDest)s
753            }
754            ''' % { "op" : op,
755                    "readDest" : readDestCode,
756                    "destType" : destType,
757                    "writeDest" : writeDest }
758        for reg in range(rCount):
759            if toInt:
760                eWalkCode += '''
761                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
762                ''' % { "reg" : reg }
763            else:
764                eWalkCode += '''
765                FpDestP%(reg)d = destRegs[%(reg)d];
766                ''' % { "reg" : reg }
767        iop = InstObjParams(name, Name,
768                            "FpRegRegRegOp",
769                            { "code": eWalkCode,
770                              "r_count": rCount,
771                              "predicate_test": predicateTest,
772                              "op_class": opClass }, [])
773        header_output += NeonRegRegRegOpDeclare.subst(iop)
774        exec_output += NeonEqualRegExecute.subst(iop)
775        for type in types:
776            substDict = { "targs" : type,
777                          "class_name" : Name }
778            exec_output += NeonExecDeclare.subst(substDict)
779
780    def threeUnequalRegInst(name, Name, opClass, types, op,
781                            bigSrc1, bigSrc2, bigDest, readDest):
782        global header_output, exec_output
783        src1Cnt = src2Cnt = destCnt = 2
784        src1Prefix = src2Prefix = destPrefix = ''
785        if bigSrc1:
786            src1Cnt = 4
787            src1Prefix = 'Big'
788        if bigSrc2:
789            src2Cnt = 4
790            src2Prefix = 'Big'
791        if bigDest:
792            destCnt = 4
793            destPrefix = 'Big'
794        eWalkCode = simdEnabledCheckCode + '''
795            %sRegVect srcReg1;
796            %sRegVect srcReg2;
797            %sRegVect destReg;
798        ''' % (src1Prefix, src2Prefix, destPrefix)
799        for reg in range(src1Cnt):
800            eWalkCode += '''
801                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
802            ''' % { "reg" : reg }
803        for reg in range(src2Cnt):
804            eWalkCode += '''
805                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
806            ''' % { "reg" : reg }
807        if readDest:
808            for reg in range(destCnt):
809                eWalkCode += '''
810                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
811                ''' % { "reg" : reg }
812        readDestCode = ''
813        if readDest:
814            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815        eWalkCode += '''
816        for (unsigned i = 0; i < eCount; i++) {
817            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819            %(destPrefix)sElement destElem;
820            %(readDest)s
821            %(op)s
822            destReg.elements[i] = htog(destElem);
823        }
824        ''' % { "op" : op, "readDest" : readDestCode,
825                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826                "destPrefix" : destPrefix }
827        for reg in range(destCnt):
828            eWalkCode += '''
829            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
830            ''' % { "reg" : reg }
831        iop = InstObjParams(name, Name,
832                            "RegRegRegOp",
833                            { "code": eWalkCode,
834                              "r_count": 2,
835                              "predicate_test": predicateTest,
836                              "op_class": opClass }, [])
837        header_output += NeonRegRegRegOpDeclare.subst(iop)
838        exec_output += NeonUnequalRegExecute.subst(iop)
839        for type in types:
840            substDict = { "targs" : type,
841                          "class_name" : Name }
842            exec_output += NeonExecDeclare.subst(substDict)
843
844    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845        threeUnequalRegInst(name, Name, opClass, types, op,
846                            True, True, False, readDest)
847
848    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849        threeUnequalRegInst(name, Name, opClass, types, op,
850                            False, False, True, readDest)
851
852    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853        threeUnequalRegInst(name, Name, opClass, types, op,
854                            True, False, True, readDest)
855
856    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857        global header_output, exec_output
858        eWalkCode = simdEnabledCheckCode + '''
859        RegVect srcReg1, srcReg2, destReg;
860        '''
861        for reg in range(rCount):
862            eWalkCode += '''
863                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
864                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
865            ''' % { "reg" : reg }
866            if readDest:
867                eWalkCode += '''
868                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
869                ''' % { "reg" : reg }
870        readDestCode = ''
871        if readDest:
872            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873        eWalkCode += '''
874        assert(imm >= 0 && imm < eCount);
875        for (unsigned i = 0; i < eCount; i++) {
876            Element srcElem1 = gtoh(srcReg1.elements[i]);
877            Element srcElem2 = gtoh(srcReg2.elements[imm]);
878            Element destElem;
879            %(readDest)s
880            %(op)s
881            destReg.elements[i] = htog(destElem);
882        }
883        ''' % { "op" : op, "readDest" : readDestCode }
884        for reg in range(rCount):
885            eWalkCode += '''
886            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
887            ''' % { "reg" : reg }
888        iop = InstObjParams(name, Name,
889                            "RegRegRegImmOp",
890                            { "code": eWalkCode,
891                              "r_count": rCount,
892                              "predicate_test": predicateTest,
893                              "op_class": opClass }, [])
894        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
895        exec_output += NeonEqualRegExecute.subst(iop)
896        for type in types:
897            substDict = { "targs" : type,
898                          "class_name" : Name }
899            exec_output += NeonExecDeclare.subst(substDict)
900
901    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
902        global header_output, exec_output
903        rCount = 2
904        eWalkCode = simdEnabledCheckCode + '''
905        RegVect srcReg1, srcReg2;
906        BigRegVect destReg;
907        '''
908        for reg in range(rCount):
909            eWalkCode += '''
910                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
911                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);;
912            ''' % { "reg" : reg }
913        if readDest:
914            for reg in range(2 * rCount):
915                eWalkCode += '''
916                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
917                ''' % { "reg" : reg }
918        readDestCode = ''
919        if readDest:
920            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
921        eWalkCode += '''
922        assert(imm >= 0 && imm < eCount);
923        for (unsigned i = 0; i < eCount; i++) {
924            Element srcElem1 = gtoh(srcReg1.elements[i]);
925            Element srcElem2 = gtoh(srcReg2.elements[imm]);
926            BigElement destElem;
927            %(readDest)s
928            %(op)s
929            destReg.elements[i] = htog(destElem);
930        }
931        ''' % { "op" : op, "readDest" : readDestCode }
932        for reg in range(2 * rCount):
933            eWalkCode += '''
934            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
935            ''' % { "reg" : reg }
936        iop = InstObjParams(name, Name,
937                            "RegRegRegImmOp",
938                            { "code": eWalkCode,
939                              "r_count": rCount,
940                              "predicate_test": predicateTest,
941                              "op_class": opClass }, [])
942        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
943        exec_output += NeonUnequalRegExecute.subst(iop)
944        for type in types:
945            substDict = { "targs" : type,
946                          "class_name" : Name }
947            exec_output += NeonExecDeclare.subst(substDict)
948
949    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
950        global header_output, exec_output
951        eWalkCode = simdEnabledCheckCode + '''
952        typedef FloatReg FloatVect[rCount];
953        FloatVect srcRegs1, srcRegs2, destRegs;
954        '''
955        for reg in range(rCount):
956            eWalkCode += '''
957                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
958                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
959            ''' % { "reg" : reg }
960            if readDest:
961                eWalkCode += '''
962                    destRegs[%(reg)d] = FpDestP%(reg)d;
963                ''' % { "reg" : reg }
964        readDestCode = ''
965        if readDest:
966            readDestCode = 'destReg = destRegs[i];'
967        eWalkCode += '''
968        assert(imm >= 0 && imm < rCount);
969        for (unsigned i = 0; i < rCount; i++) {
970            FloatReg srcReg1 = srcRegs1[i];
971            FloatReg srcReg2 = srcRegs2[imm];
972            FloatReg destReg;
973            %(readDest)s
974            %(op)s
975            destRegs[i] = destReg;
976        }
977        ''' % { "op" : op, "readDest" : readDestCode }
978        for reg in range(rCount):
979            eWalkCode += '''
980            FpDestP%(reg)d = destRegs[%(reg)d];
981            ''' % { "reg" : reg }
982        iop = InstObjParams(name, Name,
983                            "FpRegRegRegImmOp",
984                            { "code": eWalkCode,
985                              "r_count": rCount,
986                              "predicate_test": predicateTest,
987                              "op_class": opClass }, [])
988        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
989        exec_output += NeonEqualRegExecute.subst(iop)
990        for type in types:
991            substDict = { "targs" : type,
992                          "class_name" : Name }
993            exec_output += NeonExecDeclare.subst(substDict)
994
995    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
996            readDest=False, toInt=False, fromInt=False):
997        global header_output, exec_output
998        eWalkCode = simdEnabledCheckCode + '''
999        RegVect srcRegs1, destRegs;
1000        '''
1001        for reg in range(rCount):
1002            eWalkCode += '''
1003                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1004            ''' % { "reg" : reg }
1005            if readDest:
1006                eWalkCode += '''
1007                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1008                ''' % { "reg" : reg }
1009        readDestCode = ''
1010        if readDest:
1011            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1012            if toInt:
1013                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1014        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1015        if fromInt:
1016            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1017        declDest = 'Element destElem;'
1018        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1019        if toInt:
1020            declDest = 'FloatRegBits destReg;'
1021            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1022        eWalkCode += '''
1023        for (unsigned i = 0; i < eCount; i++) {
1024            %(readOp)s
1025            %(declDest)s
1026            %(readDest)s
1027            %(op)s
1028            %(writeDest)s
1029        }
1030        ''' % { "readOp" : readOpCode,
1031                "declDest" : declDest,
1032                "readDest" : readDestCode,
1033                "op" : op,
1034                "writeDest" : writeDestCode }
1035        for reg in range(rCount):
1036            eWalkCode += '''
1037            FpDestP%(reg)d.uw = gtoh(destRegs.regs[%(reg)d]);
1038            ''' % { "reg" : reg }
1039        iop = InstObjParams(name, Name,
1040                            "RegRegImmOp",
1041                            { "code": eWalkCode,
1042                              "r_count": rCount,
1043                              "predicate_test": predicateTest,
1044                              "op_class": opClass }, [])
1045        header_output += NeonRegRegImmOpDeclare.subst(iop)
1046        exec_output += NeonEqualRegExecute.subst(iop)
1047        for type in types:
1048            substDict = { "targs" : type,
1049                          "class_name" : Name }
1050            exec_output += NeonExecDeclare.subst(substDict)
1051
1052    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1053        global header_output, exec_output
1054        eWalkCode = simdEnabledCheckCode + '''
1055        BigRegVect srcReg1;
1056        RegVect destReg;
1057        '''
1058        for reg in range(4):
1059            eWalkCode += '''
1060                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1061            ''' % { "reg" : reg }
1062        if readDest:
1063            for reg in range(2):
1064                eWalkCode += '''
1065                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1066                ''' % { "reg" : reg }
1067        readDestCode = ''
1068        if readDest:
1069            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1070        eWalkCode += '''
1071        for (unsigned i = 0; i < eCount; i++) {
1072            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1073            Element destElem;
1074            %(readDest)s
1075            %(op)s
1076            destReg.elements[i] = htog(destElem);
1077        }
1078        ''' % { "op" : op, "readDest" : readDestCode }
1079        for reg in range(2):
1080            eWalkCode += '''
1081            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1082            ''' % { "reg" : reg }
1083        iop = InstObjParams(name, Name,
1084                            "RegRegImmOp",
1085                            { "code": eWalkCode,
1086                              "r_count": 2,
1087                              "predicate_test": predicateTest,
1088                              "op_class": opClass }, [])
1089        header_output += NeonRegRegImmOpDeclare.subst(iop)
1090        exec_output += NeonUnequalRegExecute.subst(iop)
1091        for type in types:
1092            substDict = { "targs" : type,
1093                          "class_name" : Name }
1094            exec_output += NeonExecDeclare.subst(substDict)
1095
1096    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1097        global header_output, exec_output
1098        eWalkCode = simdEnabledCheckCode + '''
1099        RegVect srcReg1;
1100        BigRegVect destReg;
1101        '''
1102        for reg in range(2):
1103            eWalkCode += '''
1104                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1105            ''' % { "reg" : reg }
1106        if readDest:
1107            for reg in range(4):
1108                eWalkCode += '''
1109                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1110                ''' % { "reg" : reg }
1111        readDestCode = ''
1112        if readDest:
1113            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1114        eWalkCode += '''
1115        for (unsigned i = 0; i < eCount; i++) {
1116            Element srcElem1 = gtoh(srcReg1.elements[i]);
1117            BigElement destElem;
1118            %(readDest)s
1119            %(op)s
1120            destReg.elements[i] = htog(destElem);
1121        }
1122        ''' % { "op" : op, "readDest" : readDestCode }
1123        for reg in range(4):
1124            eWalkCode += '''
1125            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1126            ''' % { "reg" : reg }
1127        iop = InstObjParams(name, Name,
1128                            "RegRegImmOp",
1129                            { "code": eWalkCode,
1130                              "r_count": 2,
1131                              "predicate_test": predicateTest,
1132                              "op_class": opClass }, [])
1133        header_output += NeonRegRegImmOpDeclare.subst(iop)
1134        exec_output += NeonUnequalRegExecute.subst(iop)
1135        for type in types:
1136            substDict = { "targs" : type,
1137                          "class_name" : Name }
1138            exec_output += NeonExecDeclare.subst(substDict)
1139
1140    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1141        global header_output, exec_output
1142        eWalkCode = simdEnabledCheckCode + '''
1143        RegVect srcReg1, destReg;
1144        '''
1145        for reg in range(rCount):
1146            eWalkCode += '''
1147                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1148            ''' % { "reg" : reg }
1149            if readDest:
1150                eWalkCode += '''
1151                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1152                ''' % { "reg" : reg }
1153        readDestCode = ''
1154        if readDest:
1155            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1156        eWalkCode += '''
1157        for (unsigned i = 0; i < eCount; i++) {
1158            unsigned j = i;
1159            Element srcElem1 = gtoh(srcReg1.elements[i]);
1160            Element destElem;
1161            %(readDest)s
1162            %(op)s
1163            destReg.elements[j] = htog(destElem);
1164        }
1165        ''' % { "op" : op, "readDest" : readDestCode }
1166        for reg in range(rCount):
1167            eWalkCode += '''
1168            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1169            ''' % { "reg" : reg }
1170        iop = InstObjParams(name, Name,
1171                            "RegRegOp",
1172                            { "code": eWalkCode,
1173                              "r_count": rCount,
1174                              "predicate_test": predicateTest,
1175                              "op_class": opClass }, [])
1176        header_output += NeonRegRegOpDeclare.subst(iop)
1177        exec_output += NeonEqualRegExecute.subst(iop)
1178        for type in types:
1179            substDict = { "targs" : type,
1180                          "class_name" : Name }
1181            exec_output += NeonExecDeclare.subst(substDict)
1182
1183    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1184        global header_output, exec_output
1185        eWalkCode = simdEnabledCheckCode + '''
1186        RegVect srcReg1, destReg;
1187        '''
1188        for reg in range(rCount):
1189            eWalkCode += '''
1190                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1191            ''' % { "reg" : reg }
1192            if readDest:
1193                eWalkCode += '''
1194                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1195                ''' % { "reg" : reg }
1196        readDestCode = ''
1197        if readDest:
1198            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1199        eWalkCode += '''
1200        for (unsigned i = 0; i < eCount; i++) {
1201            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1202            Element destElem;
1203            %(readDest)s
1204            %(op)s
1205            destReg.elements[i] = htog(destElem);
1206        }
1207        ''' % { "op" : op, "readDest" : readDestCode }
1208        for reg in range(rCount):
1209            eWalkCode += '''
1210            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1211            ''' % { "reg" : reg }
1212        iop = InstObjParams(name, Name,
1213                            "RegRegImmOp",
1214                            { "code": eWalkCode,
1215                              "r_count": rCount,
1216                              "predicate_test": predicateTest,
1217                              "op_class": opClass }, [])
1218        header_output += NeonRegRegImmOpDeclare.subst(iop)
1219        exec_output += NeonEqualRegExecute.subst(iop)
1220        for type in types:
1221            substDict = { "targs" : type,
1222                          "class_name" : Name }
1223            exec_output += NeonExecDeclare.subst(substDict)
1224
1225    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1226        global header_output, exec_output
1227        eWalkCode = simdEnabledCheckCode + '''
1228        RegVect srcReg1, destReg;
1229        '''
1230        for reg in range(rCount):
1231            eWalkCode += '''
1232                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1233                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1234            ''' % { "reg" : reg }
1235            if readDest:
1236                eWalkCode += '''
1237                ''' % { "reg" : reg }
1238        readDestCode = ''
1239        if readDest:
1240            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1241        eWalkCode += op
1242        for reg in range(rCount):
1243            eWalkCode += '''
1244            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1245            FpOp1P%(reg)d.uw = gtoh(srcReg1.regs[%(reg)d]);
1246            ''' % { "reg" : reg }
1247        iop = InstObjParams(name, Name,
1248                            "RegRegOp",
1249                            { "code": eWalkCode,
1250                              "r_count": rCount,
1251                              "predicate_test": predicateTest,
1252                              "op_class": opClass }, [])
1253        header_output += NeonRegRegOpDeclare.subst(iop)
1254        exec_output += NeonEqualRegExecute.subst(iop)
1255        for type in types:
1256            substDict = { "targs" : type,
1257                          "class_name" : Name }
1258            exec_output += NeonExecDeclare.subst(substDict)
1259
1260    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1261            readDest=False, toInt=False):
1262        global header_output, exec_output
1263        eWalkCode = simdEnabledCheckCode + '''
1264        typedef FloatReg FloatVect[rCount];
1265        FloatVect srcRegs1;
1266        '''
1267        if toInt:
1268            eWalkCode += 'RegVect destRegs;\n'
1269        else:
1270            eWalkCode += 'FloatVect destRegs;\n'
1271        for reg in range(rCount):
1272            eWalkCode += '''
1273                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1274            ''' % { "reg" : reg }
1275            if readDest:
1276                if toInt:
1277                    eWalkCode += '''
1278                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1279                    ''' % { "reg" : reg }
1280                else:
1281                    eWalkCode += '''
1282                        destRegs[%(reg)d] = FpDestP%(reg)d;
1283                    ''' % { "reg" : reg }
1284        readDestCode = ''
1285        if readDest:
1286            readDestCode = 'destReg = destRegs[i];'
1287        destType = 'FloatReg'
1288        writeDest = 'destRegs[r] = destReg;'
1289        if toInt:
1290            destType = 'FloatRegBits'
1291            writeDest = 'destRegs.regs[r] = destReg;'
1292        eWalkCode += '''
1293        for (unsigned r = 0; r < rCount; r++) {
1294            FloatReg srcReg1 = srcRegs1[r];
1295            %(destType)s destReg;
1296            %(readDest)s
1297            %(op)s
1298            %(writeDest)s
1299        }
1300        ''' % { "op" : op,
1301                "readDest" : readDestCode,
1302                "destType" : destType,
1303                "writeDest" : writeDest }
1304        for reg in range(rCount):
1305            if toInt:
1306                eWalkCode += '''
1307                FpDestP%(reg)d.uw = destRegs.regs[%(reg)d];
1308                ''' % { "reg" : reg }
1309            else:
1310                eWalkCode += '''
1311                FpDestP%(reg)d = destRegs[%(reg)d];
1312                ''' % { "reg" : reg }
1313        iop = InstObjParams(name, Name,
1314                            "FpRegRegOp",
1315                            { "code": eWalkCode,
1316                              "r_count": rCount,
1317                              "predicate_test": predicateTest,
1318                              "op_class": opClass }, [])
1319        header_output += NeonRegRegOpDeclare.subst(iop)
1320        exec_output += NeonEqualRegExecute.subst(iop)
1321        for type in types:
1322            substDict = { "targs" : type,
1323                          "class_name" : Name }
1324            exec_output += NeonExecDeclare.subst(substDict)
1325
1326    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1327        global header_output, exec_output
1328        eWalkCode = simdEnabledCheckCode + '''
1329        RegVect srcRegs;
1330        BigRegVect destReg;
1331        '''
1332        for reg in range(rCount):
1333            eWalkCode += '''
1334                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1335            ''' % { "reg" : reg }
1336            if readDest:
1337                eWalkCode += '''
1338                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1339                ''' % { "reg" : reg }
1340        readDestCode = ''
1341        if readDest:
1342            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1343        eWalkCode += '''
1344        for (unsigned i = 0; i < eCount / 2; i++) {
1345            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1346            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1347            BigElement destElem;
1348            %(readDest)s
1349            %(op)s
1350            destReg.elements[i] = htog(destElem);
1351        }
1352        ''' % { "op" : op, "readDest" : readDestCode }
1353        for reg in range(rCount):
1354            eWalkCode += '''
1355            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1356            ''' % { "reg" : reg }
1357        iop = InstObjParams(name, Name,
1358                            "RegRegOp",
1359                            { "code": eWalkCode,
1360                              "r_count": rCount,
1361                              "predicate_test": predicateTest,
1362                              "op_class": opClass }, [])
1363        header_output += NeonRegRegOpDeclare.subst(iop)
1364        exec_output += NeonUnequalRegExecute.subst(iop)
1365        for type in types:
1366            substDict = { "targs" : type,
1367                          "class_name" : Name }
1368            exec_output += NeonExecDeclare.subst(substDict)
1369
1370    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1371        global header_output, exec_output
1372        eWalkCode = simdEnabledCheckCode + '''
1373        BigRegVect srcReg1;
1374        RegVect destReg;
1375        '''
1376        for reg in range(4):
1377            eWalkCode += '''
1378                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1379            ''' % { "reg" : reg }
1380        if readDest:
1381            for reg in range(2):
1382                eWalkCode += '''
1383                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1384                ''' % { "reg" : reg }
1385        readDestCode = ''
1386        if readDest:
1387            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1388        eWalkCode += '''
1389        for (unsigned i = 0; i < eCount; i++) {
1390            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1391            Element destElem;
1392            %(readDest)s
1393            %(op)s
1394            destReg.elements[i] = htog(destElem);
1395        }
1396        ''' % { "op" : op, "readDest" : readDestCode }
1397        for reg in range(2):
1398            eWalkCode += '''
1399            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1400            ''' % { "reg" : reg }
1401        iop = InstObjParams(name, Name,
1402                            "RegRegOp",
1403                            { "code": eWalkCode,
1404                              "r_count": 2,
1405                              "predicate_test": predicateTest,
1406                              "op_class": opClass }, [])
1407        header_output += NeonRegRegOpDeclare.subst(iop)
1408        exec_output += NeonUnequalRegExecute.subst(iop)
1409        for type in types:
1410            substDict = { "targs" : type,
1411                          "class_name" : Name }
1412            exec_output += NeonExecDeclare.subst(substDict)
1413
1414    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1415        global header_output, exec_output
1416        eWalkCode = simdEnabledCheckCode + '''
1417        RegVect destReg;
1418        '''
1419        if readDest:
1420            for reg in range(rCount):
1421                eWalkCode += '''
1422                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1423                ''' % { "reg" : reg }
1424        readDestCode = ''
1425        if readDest:
1426            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1427        eWalkCode += '''
1428        for (unsigned i = 0; i < eCount; i++) {
1429            Element destElem;
1430            %(readDest)s
1431            %(op)s
1432            destReg.elements[i] = htog(destElem);
1433        }
1434        ''' % { "op" : op, "readDest" : readDestCode }
1435        for reg in range(rCount):
1436            eWalkCode += '''
1437            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1438            ''' % { "reg" : reg }
1439        iop = InstObjParams(name, Name,
1440                            "RegImmOp",
1441                            { "code": eWalkCode,
1442                              "r_count": rCount,
1443                              "predicate_test": predicateTest,
1444                              "op_class": opClass }, [])
1445        header_output += NeonRegImmOpDeclare.subst(iop)
1446        exec_output += NeonEqualRegExecute.subst(iop)
1447        for type in types:
1448            substDict = { "targs" : type,
1449                          "class_name" : Name }
1450            exec_output += NeonExecDeclare.subst(substDict)
1451
1452    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1453        global header_output, exec_output
1454        eWalkCode = simdEnabledCheckCode + '''
1455        RegVect srcReg1;
1456        BigRegVect destReg;
1457        '''
1458        for reg in range(2):
1459            eWalkCode += '''
1460                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
1461            ''' % { "reg" : reg }
1462        if readDest:
1463            for reg in range(4):
1464                eWalkCode += '''
1465                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d.uw);
1466                ''' % { "reg" : reg }
1467        readDestCode = ''
1468        if readDest:
1469            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1470        eWalkCode += '''
1471        for (unsigned i = 0; i < eCount; i++) {
1472            Element srcElem1 = gtoh(srcReg1.elements[i]);
1473            BigElement destElem;
1474            %(readDest)s
1475            %(op)s
1476            destReg.elements[i] = htog(destElem);
1477        }
1478        ''' % { "op" : op, "readDest" : readDestCode }
1479        for reg in range(4):
1480            eWalkCode += '''
1481            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
1482            ''' % { "reg" : reg }
1483        iop = InstObjParams(name, Name,
1484                            "RegRegOp",
1485                            { "code": eWalkCode,
1486                              "r_count": 2,
1487                              "predicate_test": predicateTest,
1488                              "op_class": opClass }, [])
1489        header_output += NeonRegRegOpDeclare.subst(iop)
1490        exec_output += NeonUnequalRegExecute.subst(iop)
1491        for type in types:
1492            substDict = { "targs" : type,
1493                          "class_name" : Name }
1494            exec_output += NeonExecDeclare.subst(substDict)
1495
1496    vhaddCode = '''
1497        Element carryBit =
1498            (((unsigned)srcElem1 & 0x1) +
1499             ((unsigned)srcElem2 & 0x1)) >> 1;
1500        // Use division instead of a shift to ensure the sign extension works
1501        // right. The compiler will figure out if it can be a shift. Mask the
1502        // inputs so they get truncated correctly.
1503        destElem = (((srcElem1 & ~(Element)1) / 2) +
1504                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1505    '''
1506    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1507    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1508
1509    vrhaddCode = '''
1510        Element carryBit =
1511            (((unsigned)srcElem1 & 0x1) +
1512             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1513        // Use division instead of a shift to ensure the sign extension works
1514        // right. The compiler will figure out if it can be a shift. Mask the
1515        // inputs so they get truncated correctly.
1516        destElem = (((srcElem1 & ~(Element)1) / 2) +
1517                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1518    '''
1519    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1520    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1521
1522    vhsubCode = '''
1523        Element barrowBit =
1524            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1525        // Use division instead of a shift to ensure the sign extension works
1526        // right. The compiler will figure out if it can be a shift. Mask the
1527        // inputs so they get truncated correctly.
1528        destElem = (((srcElem1 & ~(Element)1) / 2) -
1529                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1530    '''
1531    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1532    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1533
1534    vandCode = '''
1535        destElem = srcElem1 & srcElem2;
1536    '''
1537    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1538    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1539
1540    vbicCode = '''
1541        destElem = srcElem1 & ~srcElem2;
1542    '''
1543    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1544    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1545
1546    vorrCode = '''
1547        destElem = srcElem1 | srcElem2;
1548    '''
1549    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1550    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1551
1552    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1553    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1554
1555    vornCode = '''
1556        destElem = srcElem1 | ~srcElem2;
1557    '''
1558    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1559    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1560
1561    veorCode = '''
1562        destElem = srcElem1 ^ srcElem2;
1563    '''
1564    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1565    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1566
1567    vbifCode = '''
1568        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1569    '''
1570    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1571    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1572    vbitCode = '''
1573        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1574    '''
1575    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1576    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1577    vbslCode = '''
1578        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1579    '''
1580    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1581    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1582
1583    vmaxCode = '''
1584        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1585    '''
1586    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1587    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1588
1589    vminCode = '''
1590        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1591    '''
1592    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1593    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1594
1595    vaddCode = '''
1596        destElem = srcElem1 + srcElem2;
1597    '''
1598    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1599    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1600
1601    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1602                      2, vaddCode, pairwise=True)
1603    threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1604                      4, vaddCode, pairwise=True)
1605    vaddlwCode = '''
1606        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1607    '''
1608    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1609    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1610    vaddhnCode = '''
1611        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1612                   (sizeof(Element) * 8);
1613    '''
1614    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1615    vraddhnCode = '''
1616        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1617                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1618                   (sizeof(Element) * 8);
1619    '''
1620    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1621
1622    vsubCode = '''
1623        destElem = srcElem1 - srcElem2;
1624    '''
1625    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1626    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1627    vsublwCode = '''
1628        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1629    '''
1630    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1631    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1632
1633    vqaddUCode = '''
1634        destElem = srcElem1 + srcElem2;
1635        FPSCR fpscr = (FPSCR)Fpscr;
1636        if (destElem < srcElem1 || destElem < srcElem2) {
1637            destElem = (Element)(-1);
1638            fpscr.qc = 1;
1639        }
1640        Fpscr = fpscr;
1641    '''
1642    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1643    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1644    vsubhnCode = '''
1645        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1646                   (sizeof(Element) * 8);
1647    '''
1648    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1649    vrsubhnCode = '''
1650        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1651                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1652                   (sizeof(Element) * 8);
1653    '''
1654    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1655
1656    vqaddSCode = '''
1657        destElem = srcElem1 + srcElem2;
1658        FPSCR fpscr = (FPSCR)Fpscr;
1659        bool negDest = (destElem < 0);
1660        bool negSrc1 = (srcElem1 < 0);
1661        bool negSrc2 = (srcElem2 < 0);
1662        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1663            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1664            if (negDest)
1665                destElem -= 1;
1666            fpscr.qc = 1;
1667        }
1668        Fpscr = fpscr;
1669    '''
1670    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1671    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1672
1673    vqsubUCode = '''
1674        destElem = srcElem1 - srcElem2;
1675        FPSCR fpscr = (FPSCR)Fpscr;
1676        if (destElem > srcElem1) {
1677            destElem = 0;
1678            fpscr.qc = 1;
1679        }
1680        Fpscr = fpscr;
1681    '''
1682    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1683    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1684
1685    vqsubSCode = '''
1686        destElem = srcElem1 - srcElem2;
1687        FPSCR fpscr = (FPSCR)Fpscr;
1688        bool negDest = (destElem < 0);
1689        bool negSrc1 = (srcElem1 < 0);
1690        bool posSrc2 = (srcElem2 >= 0);
1691        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1692            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1693            if (negDest)
1694                destElem -= 1;
1695            fpscr.qc = 1;
1696        }
1697        Fpscr = fpscr;
1698    '''
1699    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1700    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1701
1702    vcgtCode = '''
1703        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1704    '''
1705    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1706    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1707
1708    vcgeCode = '''
1709        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1710    '''
1711    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1712    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1713
1714    vceqCode = '''
1715        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1716    '''
1717    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1718    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1719
1720    vshlCode = '''
1721        int16_t shiftAmt = (int8_t)srcElem2;
1722        if (shiftAmt < 0) {
1723            shiftAmt = -shiftAmt;
1724            if (shiftAmt >= sizeof(Element) * 8) {
1725                shiftAmt = sizeof(Element) * 8 - 1;
1726                destElem = 0;
1727            } else {
1728                destElem = (srcElem1 >> shiftAmt);
1729            }
1730            // Make sure the right shift sign extended when it should.
1731            if (ltz(srcElem1) && !ltz(destElem)) {
1732                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1733                                             1 - shiftAmt));
1734            }
1735        } else {
1736            if (shiftAmt >= sizeof(Element) * 8) {
1737                destElem = 0;
1738            } else {
1739                destElem = srcElem1 << shiftAmt;
1740            }
1741        }
1742    '''
1743    threeEqualRegInst("vshl", "VshlD", "SimdAluOp", allTypes, 2, vshlCode)
1744    threeEqualRegInst("vshl", "VshlQ", "SimdAluOp", allTypes, 4, vshlCode)
1745
1746    vrshlCode = '''
1747        int16_t shiftAmt = (int8_t)srcElem2;
1748        if (shiftAmt < 0) {
1749            shiftAmt = -shiftAmt;
1750            Element rBit = 0;
1751            if (shiftAmt <= sizeof(Element) * 8)
1752                rBit = bits(srcElem1, shiftAmt - 1);
1753            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1754                rBit = 1;
1755            if (shiftAmt >= sizeof(Element) * 8) {
1756                shiftAmt = sizeof(Element) * 8 - 1;
1757                destElem = 0;
1758            } else {
1759                destElem = (srcElem1 >> shiftAmt);
1760            }
1761            // Make sure the right shift sign extended when it should.
1762            if (ltz(srcElem1) && !ltz(destElem)) {
1763                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1764                                             1 - shiftAmt));
1765            }
1766            destElem += rBit;
1767        } else if (shiftAmt > 0) {
1768            if (shiftAmt >= sizeof(Element) * 8) {
1769                destElem = 0;
1770            } else {
1771                destElem = srcElem1 << shiftAmt;
1772            }
1773        } else {
1774            destElem = srcElem1;
1775        }
1776    '''
1777    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1778    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1779
1780    vqshlUCode = '''
1781        int16_t shiftAmt = (int8_t)srcElem2;
1782        FPSCR fpscr = (FPSCR)Fpscr;
1783        if (shiftAmt < 0) {
1784            shiftAmt = -shiftAmt;
1785            if (shiftAmt >= sizeof(Element) * 8) {
1786                shiftAmt = sizeof(Element) * 8 - 1;
1787                destElem = 0;
1788            } else {
1789                destElem = (srcElem1 >> shiftAmt);
1790            }
1791        } else if (shiftAmt > 0) {
1792            if (shiftAmt >= sizeof(Element) * 8) {
1793                if (srcElem1 != 0) {
1794                    destElem = mask(sizeof(Element) * 8);
1795                    fpscr.qc = 1;
1796                } else {
1797                    destElem = 0;
1798                }
1799            } else {
1800                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1801                            sizeof(Element) * 8 - shiftAmt)) {
1802                    destElem = mask(sizeof(Element) * 8);
1803                    fpscr.qc = 1;
1804                } else {
1805                    destElem = srcElem1 << shiftAmt;
1806                }
1807            }
1808        } else {
1809            destElem = srcElem1;
1810        }
1811        Fpscr = fpscr;
1812    '''
1813    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1814    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1815
1816    vqshlSCode = '''
1817        int16_t shiftAmt = (int8_t)srcElem2;
1818        FPSCR fpscr = (FPSCR)Fpscr;
1819        if (shiftAmt < 0) {
1820            shiftAmt = -shiftAmt;
1821            if (shiftAmt >= sizeof(Element) * 8) {
1822                shiftAmt = sizeof(Element) * 8 - 1;
1823                destElem = 0;
1824            } else {
1825                destElem = (srcElem1 >> shiftAmt);
1826            }
1827            // Make sure the right shift sign extended when it should.
1828            if (srcElem1 < 0 && destElem >= 0) {
1829                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1830                                             1 - shiftAmt));
1831            }
1832        } else if (shiftAmt > 0) {
1833            bool sat = false;
1834            if (shiftAmt >= sizeof(Element) * 8) {
1835                if (srcElem1 != 0)
1836                    sat = true;
1837                else
1838                    destElem = 0;
1839            } else {
1840                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1841                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1842                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1843                    sat = true;
1844                } else {
1845                    destElem = srcElem1 << shiftAmt;
1846                }
1847            }
1848            if (sat) {
1849                fpscr.qc = 1;
1850                destElem = mask(sizeof(Element) * 8 - 1);
1851                if (srcElem1 < 0)
1852                    destElem = ~destElem;
1853            }
1854        } else {
1855            destElem = srcElem1;
1856        }
1857        Fpscr = fpscr;
1858    '''
1859    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1860    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1861
1862    vqrshlUCode = '''
1863        int16_t shiftAmt = (int8_t)srcElem2;
1864        FPSCR fpscr = (FPSCR)Fpscr;
1865        if (shiftAmt < 0) {
1866            shiftAmt = -shiftAmt;
1867            Element rBit = 0;
1868            if (shiftAmt <= sizeof(Element) * 8)
1869                rBit = bits(srcElem1, shiftAmt - 1);
1870            if (shiftAmt >= sizeof(Element) * 8) {
1871                shiftAmt = sizeof(Element) * 8 - 1;
1872                destElem = 0;
1873            } else {
1874                destElem = (srcElem1 >> shiftAmt);
1875            }
1876            destElem += rBit;
1877        } else {
1878            if (shiftAmt >= sizeof(Element) * 8) {
1879                if (srcElem1 != 0) {
1880                    destElem = mask(sizeof(Element) * 8);
1881                    fpscr.qc = 1;
1882                } else {
1883                    destElem = 0;
1884                }
1885            } else {
1886                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1887                            sizeof(Element) * 8 - shiftAmt)) {
1888                    destElem = mask(sizeof(Element) * 8);
1889                    fpscr.qc = 1;
1890                } else {
1891                    destElem = srcElem1 << shiftAmt;
1892                }
1893            }
1894        }
1895        Fpscr = fpscr;
1896    '''
1897    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1898    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1899
1900    vqrshlSCode = '''
1901        int16_t shiftAmt = (int8_t)srcElem2;
1902        FPSCR fpscr = (FPSCR)Fpscr;
1903        if (shiftAmt < 0) {
1904            shiftAmt = -shiftAmt;
1905            Element rBit = 0;
1906            if (shiftAmt <= sizeof(Element) * 8)
1907                rBit = bits(srcElem1, shiftAmt - 1);
1908            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1909                rBit = 1;
1910            if (shiftAmt >= sizeof(Element) * 8) {
1911                shiftAmt = sizeof(Element) * 8 - 1;
1912                destElem = 0;
1913            } else {
1914                destElem = (srcElem1 >> shiftAmt);
1915            }
1916            // Make sure the right shift sign extended when it should.
1917            if (srcElem1 < 0 && destElem >= 0) {
1918                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1919                                             1 - shiftAmt));
1920            }
1921            destElem += rBit;
1922        } else if (shiftAmt > 0) {
1923            bool sat = false;
1924            if (shiftAmt >= sizeof(Element) * 8) {
1925                if (srcElem1 != 0)
1926                    sat = true;
1927                else
1928                    destElem = 0;
1929            } else {
1930                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1931                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1932                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1933                    sat = true;
1934                } else {
1935                    destElem = srcElem1 << shiftAmt;
1936                }
1937            }
1938            if (sat) {
1939                fpscr.qc = 1;
1940                destElem = mask(sizeof(Element) * 8 - 1);
1941                if (srcElem1 < 0)
1942                    destElem = ~destElem;
1943            }
1944        } else {
1945            destElem = srcElem1;
1946        }
1947        Fpscr = fpscr;
1948    '''
1949    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1950    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1951
1952    vabaCode = '''
1953        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1954                                            (srcElem2 - srcElem1);
1955    '''
1956    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1957    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1958    vabalCode = '''
1959        destElem += (srcElem1 > srcElem2) ?
1960            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1961            ((BigElement)srcElem2 - (BigElement)srcElem1);
1962    '''
1963    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1964
1965    vabdCode = '''
1966        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1967                                           (srcElem2 - srcElem1);
1968    '''
1969    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1970    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1971    vabdlCode = '''
1972        destElem = (srcElem1 > srcElem2) ?
1973            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1974            ((BigElement)srcElem2 - (BigElement)srcElem1);
1975    '''
1976    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1977
1978    vtstCode = '''
1979        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1980    '''
1981    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1982    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1983
1984    vmulCode = '''
1985        destElem = srcElem1 * srcElem2;
1986    '''
1987    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
1988    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
1989    vmullCode = '''
1990        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
1991    '''
1992    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
1993
1994    vmlaCode = '''
1995        destElem = destElem + srcElem1 * srcElem2;
1996    '''
1997    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
1998    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
1999    vmlalCode = '''
2000        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2001    '''
2002    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2003
2004    vqdmlalCode = '''
2005        FPSCR fpscr = (FPSCR)Fpscr;
2006        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2007        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2008        Element halfNeg = maxNeg / 2;
2009        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2010            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2011            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2012            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2013            fpscr.qc = 1;
2014        }
2015        bool negPreDest = ltz(destElem);
2016        destElem += midElem;
2017        bool negDest = ltz(destElem);
2018        bool negMid = ltz(midElem);
2019        if (negPreDest == negMid && negMid != negDest) {
2020            destElem = mask(sizeof(BigElement) * 8 - 1);
2021            if (negPreDest)
2022                destElem = ~destElem;
2023            fpscr.qc = 1;
2024        }
2025        Fpscr = fpscr;
2026    '''
2027    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2028
2029    vqdmlslCode = '''
2030        FPSCR fpscr = (FPSCR)Fpscr;
2031        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2032        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2033        Element halfNeg = maxNeg / 2;
2034        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2035            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2036            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2037            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2038            fpscr.qc = 1;
2039        }
2040        bool negPreDest = ltz(destElem);
2041        destElem -= midElem;
2042        bool negDest = ltz(destElem);
2043        bool posMid = ltz((BigElement)-midElem);
2044        if (negPreDest == posMid && posMid != negDest) {
2045            destElem = mask(sizeof(BigElement) * 8 - 1);
2046            if (negPreDest)
2047                destElem = ~destElem;
2048            fpscr.qc = 1;
2049        }
2050        Fpscr = fpscr;
2051    '''
2052    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2053
2054    vqdmullCode = '''
2055        FPSCR fpscr = (FPSCR)Fpscr;
2056        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2057        if (srcElem1 == srcElem2 &&
2058                srcElem1 == (Element)((Element)1 <<
2059                    (Element)(sizeof(Element) * 8 - 1))) {
2060            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2061            fpscr.qc = 1;
2062        }
2063        Fpscr = fpscr;
2064    '''
2065    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2066
2067    vmlsCode = '''
2068        destElem = destElem - srcElem1 * srcElem2;
2069    '''
2070    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2071    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2072    vmlslCode = '''
2073        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2074    '''
2075    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2076
2077    vmulpCode = '''
2078        destElem = 0;
2079        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2080            if (bits(srcElem2, j))
2081                destElem ^= srcElem1 << j;
2082        }
2083    '''
2084    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2085    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2086    vmullpCode = '''
2087        destElem = 0;
2088        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2089            if (bits(srcElem2, j))
2090                destElem ^= (BigElement)srcElem1 << j;
2091        }
2092    '''
2093    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2094
2095    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2096    threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2097
2098    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2099    threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2100
2101    vqdmulhCode = '''
2102        FPSCR fpscr = (FPSCR)Fpscr;
2103        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2104                   (sizeof(Element) * 8);
2105        if (srcElem1 == srcElem2 &&
2106                srcElem1 == (Element)((Element)1 <<
2107                    (sizeof(Element) * 8 - 1))) {
2108            destElem = ~srcElem1;
2109            fpscr.qc = 1;
2110        }
2111        Fpscr = fpscr;
2112    '''
2113    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2114    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2115
2116    vqrdmulhCode = '''
2117        FPSCR fpscr = (FPSCR)Fpscr;
2118        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2119                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2120                   (sizeof(Element) * 8);
2121        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2122        Element halfNeg = maxNeg / 2;
2123        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2124            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2125            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2126            if (destElem < 0) {
2127                destElem = mask(sizeof(Element) * 8 - 1);
2128            } else {
2129                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2130            }
2131            fpscr.qc = 1;
2132        }
2133        Fpscr = fpscr;
2134    '''
2135    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2136            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2137    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2138            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2139
2140    vmaxfpCode = '''
2141        FPSCR fpscr = (FPSCR)Fpscr;
2142        bool done;
2143        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2144        if (!done) {
2145            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2146                               true, true, VfpRoundNearest);
2147        } else if (flushToZero(srcReg1, srcReg2)) {
2148            fpscr.idc = 1;
2149        }
2150        Fpscr = fpscr;
2151    '''
2152    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2153    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2154
2155    vminfpCode = '''
2156        FPSCR fpscr = (FPSCR)Fpscr;
2157        bool done;
2158        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2159        if (!done) {
2160            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2161                               true, true, VfpRoundNearest);
2162        } else if (flushToZero(srcReg1, srcReg2)) {
2163            fpscr.idc = 1;
2164        }
2165        Fpscr = fpscr;
2166    '''
2167    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2168    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2169
2170    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2171                        2, vmaxfpCode, pairwise=True)
2172    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2173                        4, vmaxfpCode, pairwise=True)
2174
2175    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2176                        2, vminfpCode, pairwise=True)
2177    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2178                        4, vminfpCode, pairwise=True)
2179
2180    vaddfpCode = '''
2181        FPSCR fpscr = Fpscr;
2182        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2183                           true, true, VfpRoundNearest);
2184        Fpscr = fpscr;
2185    '''
2186    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2187    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2188
2189    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2190                        2, vaddfpCode, pairwise=True)
2191    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2192                        4, vaddfpCode, pairwise=True)
2193
2194    vsubfpCode = '''
2195        FPSCR fpscr = Fpscr;
2196        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2197                           true, true, VfpRoundNearest);
2198        Fpscr = fpscr;
2199    '''
2200    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2201    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2202
2203    vmulfpCode = '''
2204        FPSCR fpscr = Fpscr;
2205        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2206                           true, true, VfpRoundNearest);
2207        Fpscr = fpscr;
2208    '''
2209    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2210    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2211
2212    vmlafpCode = '''
2213        FPSCR fpscr = Fpscr;
2214        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2215                             true, true, VfpRoundNearest);
2216        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2217                           true, true, VfpRoundNearest);
2218        Fpscr = fpscr;
2219    '''
2220    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2221    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2222
2223    vmlsfpCode = '''
2224        FPSCR fpscr = Fpscr;
2225        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2226                             true, true, VfpRoundNearest);
2227        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2228                           true, true, VfpRoundNearest);
2229        Fpscr = fpscr;
2230    '''
2231    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2232    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2233
2234    vcgtfpCode = '''
2235        FPSCR fpscr = (FPSCR)Fpscr;
2236        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2237                             true, true, VfpRoundNearest);
2238        destReg = (res == 0) ? -1 : 0;
2239        if (res == 2.0)
2240            fpscr.ioc = 1;
2241        Fpscr = fpscr;
2242    '''
2243    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2244            2, vcgtfpCode, toInt = True)
2245    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2246            4, vcgtfpCode, toInt = True)
2247
2248    vcgefpCode = '''
2249        FPSCR fpscr = (FPSCR)Fpscr;
2250        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2251                             true, true, VfpRoundNearest);
2252        destReg = (res == 0) ? -1 : 0;
2253        if (res == 2.0)
2254            fpscr.ioc = 1;
2255        Fpscr = fpscr;
2256    '''
2257    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2258            2, vcgefpCode, toInt = True)
2259    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2260            4, vcgefpCode, toInt = True)
2261
2262    vacgtfpCode = '''
2263        FPSCR fpscr = (FPSCR)Fpscr;
2264        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2265                             true, true, VfpRoundNearest);
2266        destReg = (res == 0) ? -1 : 0;
2267        if (res == 2.0)
2268            fpscr.ioc = 1;
2269        Fpscr = fpscr;
2270    '''
2271    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2272            2, vacgtfpCode, toInt = True)
2273    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2274            4, vacgtfpCode, toInt = True)
2275
2276    vacgefpCode = '''
2277        FPSCR fpscr = (FPSCR)Fpscr;
2278        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2279                             true, true, VfpRoundNearest);
2280        destReg = (res == 0) ? -1 : 0;
2281        if (res == 2.0)
2282            fpscr.ioc = 1;
2283        Fpscr = fpscr;
2284    '''
2285    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2286            2, vacgefpCode, toInt = True)
2287    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2288            4, vacgefpCode, toInt = True)
2289
2290    vceqfpCode = '''
2291        FPSCR fpscr = (FPSCR)Fpscr;
2292        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2293                             true, true, VfpRoundNearest);
2294        destReg = (res == 0) ? -1 : 0;
2295        if (res == 2.0)
2296            fpscr.ioc = 1;
2297        Fpscr = fpscr;
2298    '''
2299    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2300            2, vceqfpCode, toInt = True)
2301    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2302            4, vceqfpCode, toInt = True)
2303
2304    vrecpsCode = '''
2305        FPSCR fpscr = Fpscr;
2306        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2307                           true, true, VfpRoundNearest);
2308        Fpscr = fpscr;
2309    '''
2310    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2311    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2312
2313    vrsqrtsCode = '''
2314        FPSCR fpscr = Fpscr;
2315        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2316                           true, true, VfpRoundNearest);
2317        Fpscr = fpscr;
2318    '''
2319    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2320    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2321
2322    vabdfpCode = '''
2323        FPSCR fpscr = Fpscr;
2324        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2325                             true, true, VfpRoundNearest);
2326        destReg = fabs(mid);
2327        Fpscr = fpscr;
2328    '''
2329    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2330    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2331
2332    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2333    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2334    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2335    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2336    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2337
2338    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2339    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2340    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2341    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2342    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2343
2344    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2345    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2346    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2347    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2348    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2349
2350    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2351    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2352    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2353    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2354    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2355    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2356            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2357    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2358            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2359
2360    vshrCode = '''
2361        if (imm >= sizeof(srcElem1) * 8) {
2362            if (ltz(srcElem1))
2363                destElem = -1;
2364            else
2365                destElem = 0;
2366        } else {
2367            destElem = srcElem1 >> imm;
2368        }
2369    '''
2370    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2371    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2372
2373    vsraCode = '''
2374        Element mid;;
2375        if (imm >= sizeof(srcElem1) * 8) {
2376            mid = ltz(srcElem1) ? -1 : 0;
2377        } else {
2378            mid = srcElem1 >> imm;
2379            if (ltz(srcElem1) && !ltz(mid)) {
2380                mid |= -(mid & ((Element)1 <<
2381                            (sizeof(Element) * 8 - 1 - imm)));
2382            }
2383        }
2384        destElem += mid;
2385    '''
2386    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2387    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2388
2389    vrshrCode = '''
2390        if (imm > sizeof(srcElem1) * 8) {
2391            destElem = 0;
2392        } else if (imm) {
2393            Element rBit = bits(srcElem1, imm - 1);
2394            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2395        } else {
2396            destElem = srcElem1;
2397        }
2398    '''
2399    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2400    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2401
2402    vrsraCode = '''
2403        if (imm > sizeof(srcElem1) * 8) {
2404            destElem += 0;
2405        } else if (imm) {
2406            Element rBit = bits(srcElem1, imm - 1);
2407            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2408        } else {
2409            destElem += srcElem1;
2410        }
2411    '''
2412    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2413    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2414
2415    vsriCode = '''
2416        if (imm >= sizeof(Element) * 8)
2417            destElem = destElem;
2418        else
2419            destElem = (srcElem1 >> imm) |
2420                (destElem & ~mask(sizeof(Element) * 8 - imm));
2421    '''
2422    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2423    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2424
2425    vshlCode = '''
2426        if (imm >= sizeof(Element) * 8)
2427            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2428        else
2429            destElem = srcElem1 << imm;
2430    '''
2431    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2432    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2433
2434    vsliCode = '''
2435        if (imm >= sizeof(Element) * 8)
2436            destElem = destElem;
2437        else
2438            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2439    '''
2440    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2441    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2442
2443    vqshlCode = '''
2444        FPSCR fpscr = (FPSCR)Fpscr;
2445        if (imm >= sizeof(Element) * 8) {
2446            if (srcElem1 != 0) {
2447                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2448                if (srcElem1 > 0)
2449                    destElem = ~destElem;
2450                fpscr.qc = 1;
2451            } else {
2452                destElem = 0;
2453            }
2454        } else if (imm) {
2455            destElem = (srcElem1 << imm);
2456            uint64_t topBits = bits((uint64_t)srcElem1,
2457                                    sizeof(Element) * 8 - 1,
2458                                    sizeof(Element) * 8 - 1 - imm);
2459            if (topBits != 0 && topBits != mask(imm + 1)) {
2460                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2461                if (srcElem1 > 0)
2462                    destElem = ~destElem;
2463                fpscr.qc = 1;
2464            }
2465        } else {
2466            destElem = srcElem1;
2467        }
2468        Fpscr = fpscr;
2469    '''
2470    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2471    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2472
2473    vqshluCode = '''
2474        FPSCR fpscr = (FPSCR)Fpscr;
2475        if (imm >= sizeof(Element) * 8) {
2476            if (srcElem1 != 0) {
2477                destElem = mask(sizeof(Element) * 8);
2478                fpscr.qc = 1;
2479            } else {
2480                destElem = 0;
2481            }
2482        } else if (imm) {
2483            destElem = (srcElem1 << imm);
2484            uint64_t topBits = bits((uint64_t)srcElem1,
2485                                    sizeof(Element) * 8 - 1,
2486                                    sizeof(Element) * 8 - imm);
2487            if (topBits != 0) {
2488                destElem = mask(sizeof(Element) * 8);
2489                fpscr.qc = 1;
2490            }
2491        } else {
2492            destElem = srcElem1;
2493        }
2494        Fpscr = fpscr;
2495    '''
2496    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2497    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2498
2499    vqshlusCode = '''
2500        FPSCR fpscr = (FPSCR)Fpscr;
2501        if (imm >= sizeof(Element) * 8) {
2502            if (srcElem1 < 0) {
2503                destElem = 0;
2504                fpscr.qc = 1;
2505            } else if (srcElem1 > 0) {
2506                destElem = mask(sizeof(Element) * 8);
2507                fpscr.qc = 1;
2508            } else {
2509                destElem = 0;
2510            }
2511        } else if (imm) {
2512            destElem = (srcElem1 << imm);
2513            uint64_t topBits = bits((uint64_t)srcElem1,
2514                                    sizeof(Element) * 8 - 1,
2515                                    sizeof(Element) * 8 - imm);
2516            if (srcElem1 < 0) {
2517                destElem = 0;
2518                fpscr.qc = 1;
2519            } else if (topBits != 0) {
2520                destElem = mask(sizeof(Element) * 8);
2521                fpscr.qc = 1;
2522            }
2523        } else {
2524            if (srcElem1 < 0) {
2525                fpscr.qc = 1;
2526                destElem = 0;
2527            } else {
2528                destElem = srcElem1;
2529            }
2530        }
2531        Fpscr = fpscr;
2532    '''
2533    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2534    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2535
2536    vshrnCode = '''
2537        if (imm >= sizeof(srcElem1) * 8) {
2538            destElem = 0;
2539        } else {
2540            destElem = srcElem1 >> imm;
2541        }
2542    '''
2543    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2544
2545    vrshrnCode = '''
2546        if (imm > sizeof(srcElem1) * 8) {
2547            destElem = 0;
2548        } else if (imm) {
2549            Element rBit = bits(srcElem1, imm - 1);
2550            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2551        } else {
2552            destElem = srcElem1;
2553        }
2554    '''
2555    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2556
2557    vqshrnCode = '''
2558        FPSCR fpscr = (FPSCR)Fpscr;
2559        if (imm > sizeof(srcElem1) * 8) {
2560            if (srcElem1 != 0 && srcElem1 != -1)
2561                fpscr.qc = 1;
2562            destElem = 0;
2563        } else if (imm) {
2564            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2565            mid |= -(mid & ((BigElement)1 <<
2566                        (sizeof(BigElement) * 8 - 1 - imm)));
2567            if (mid != (Element)mid) {
2568                destElem = mask(sizeof(Element) * 8 - 1);
2569                if (srcElem1 < 0)
2570                    destElem = ~destElem;
2571                fpscr.qc = 1;
2572            } else {
2573                destElem = mid;
2574            }
2575        } else {
2576            destElem = srcElem1;
2577        }
2578        Fpscr = fpscr;
2579    '''
2580    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2581
2582    vqshrunCode = '''
2583        FPSCR fpscr = (FPSCR)Fpscr;
2584        if (imm > sizeof(srcElem1) * 8) {
2585            if (srcElem1 != 0)
2586                fpscr.qc = 1;
2587            destElem = 0;
2588        } else if (imm) {
2589            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2590            if (mid != (Element)mid) {
2591                destElem = mask(sizeof(Element) * 8);
2592                fpscr.qc = 1;
2593            } else {
2594                destElem = mid;
2595            }
2596        } else {
2597            destElem = srcElem1;
2598        }
2599        Fpscr = fpscr;
2600    '''
2601    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2602                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2603
2604    vqshrunsCode = '''
2605        FPSCR fpscr = (FPSCR)Fpscr;
2606        if (imm > sizeof(srcElem1) * 8) {
2607            if (srcElem1 != 0)
2608                fpscr.qc = 1;
2609            destElem = 0;
2610        } else if (imm) {
2611            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2612            if (bits(mid, sizeof(BigElement) * 8 - 1,
2613                          sizeof(Element) * 8) != 0) {
2614                if (srcElem1 < 0) {
2615                    destElem = 0;
2616                } else {
2617                    destElem = mask(sizeof(Element) * 8);
2618                }
2619                fpscr.qc = 1;
2620            } else {
2621                destElem = mid;
2622            }
2623        } else {
2624            destElem = srcElem1;
2625        }
2626        Fpscr = fpscr;
2627    '''
2628    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2629                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2630
2631    vqrshrnCode = '''
2632        FPSCR fpscr = (FPSCR)Fpscr;
2633        if (imm > sizeof(srcElem1) * 8) {
2634            if (srcElem1 != 0 && srcElem1 != -1)
2635                fpscr.qc = 1;
2636            destElem = 0;
2637        } else if (imm) {
2638            BigElement mid = (srcElem1 >> (imm - 1));
2639            uint64_t rBit = mid & 0x1;
2640            mid >>= 1;
2641            mid |= -(mid & ((BigElement)1 <<
2642                        (sizeof(BigElement) * 8 - 1 - imm)));
2643            mid += rBit;
2644            if (mid != (Element)mid) {
2645                destElem = mask(sizeof(Element) * 8 - 1);
2646                if (srcElem1 < 0)
2647                    destElem = ~destElem;
2648                fpscr.qc = 1;
2649            } else {
2650                destElem = mid;
2651            }
2652        } else {
2653            if (srcElem1 != (Element)srcElem1) {
2654                destElem = mask(sizeof(Element) * 8 - 1);
2655                if (srcElem1 < 0)
2656                    destElem = ~destElem;
2657                fpscr.qc = 1;
2658            } else {
2659                destElem = srcElem1;
2660            }
2661        }
2662        Fpscr = fpscr;
2663    '''
2664    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2665                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2666
2667    vqrshrunCode = '''
2668        FPSCR fpscr = (FPSCR)Fpscr;
2669        if (imm > sizeof(srcElem1) * 8) {
2670            if (srcElem1 != 0)
2671                fpscr.qc = 1;
2672            destElem = 0;
2673        } else if (imm) {
2674            BigElement mid = (srcElem1 >> (imm - 1));
2675            uint64_t rBit = mid & 0x1;
2676            mid >>= 1;
2677            mid += rBit;
2678            if (mid != (Element)mid) {
2679                destElem = mask(sizeof(Element) * 8);
2680                fpscr.qc = 1;
2681            } else {
2682                destElem = mid;
2683            }
2684        } else {
2685            if (srcElem1 != (Element)srcElem1) {
2686                destElem = mask(sizeof(Element) * 8 - 1);
2687                fpscr.qc = 1;
2688            } else {
2689                destElem = srcElem1;
2690            }
2691        }
2692        Fpscr = fpscr;
2693    '''
2694    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2695                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2696
2697    vqrshrunsCode = '''
2698        FPSCR fpscr = (FPSCR)Fpscr;
2699        if (imm > sizeof(srcElem1) * 8) {
2700            if (srcElem1 != 0)
2701                fpscr.qc = 1;
2702            destElem = 0;
2703        } else if (imm) {
2704            BigElement mid = (srcElem1 >> (imm - 1));
2705            uint64_t rBit = mid & 0x1;
2706            mid >>= 1;
2707            mid |= -(mid & ((BigElement)1 <<
2708                            (sizeof(BigElement) * 8 - 1 - imm)));
2709            mid += rBit;
2710            if (bits(mid, sizeof(BigElement) * 8 - 1,
2711                          sizeof(Element) * 8) != 0) {
2712                if (srcElem1 < 0) {
2713                    destElem = 0;
2714                } else {
2715                    destElem = mask(sizeof(Element) * 8);
2716                }
2717                fpscr.qc = 1;
2718            } else {
2719                destElem = mid;
2720            }
2721        } else {
2722            if (srcElem1 < 0) {
2723                fpscr.qc = 1;
2724                destElem = 0;
2725            } else {
2726                destElem = srcElem1;
2727            }
2728        }
2729        Fpscr = fpscr;
2730    '''
2731    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2732                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2733
2734    vshllCode = '''
2735        if (imm >= sizeof(destElem) * 8) {
2736            destElem = 0;
2737        } else {
2738            destElem = (BigElement)srcElem1 << imm;
2739        }
2740    '''
2741    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2742
2743    vmovlCode = '''
2744        destElem = srcElem1;
2745    '''
2746    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2747
2748    vcvt2ufxCode = '''
2749        FPSCR fpscr = Fpscr;
2750        if (flushToZero(srcElem1))
2751            fpscr.idc = 1;
2752        VfpSavedState state = prepFpState(VfpRoundNearest);
2753        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2754        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2755        __asm__ __volatile__("" :: "m" (destReg));
2756        finishVfp(fpscr, state, true);
2757        Fpscr = fpscr;
2758    '''
2759    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2760            2, vcvt2ufxCode, toInt = True)
2761    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2762            4, vcvt2ufxCode, toInt = True)
2763
2764    vcvt2sfxCode = '''
2765        FPSCR fpscr = Fpscr;
2766        if (flushToZero(srcElem1))
2767            fpscr.idc = 1;
2768        VfpSavedState state = prepFpState(VfpRoundNearest);
2769        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2770        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2771        __asm__ __volatile__("" :: "m" (destReg));
2772        finishVfp(fpscr, state, true);
2773        Fpscr = fpscr;
2774    '''
2775    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2776            2, vcvt2sfxCode, toInt = True)
2777    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2778            4, vcvt2sfxCode, toInt = True)
2779
2780    vcvtu2fpCode = '''
2781        FPSCR fpscr = Fpscr;
2782        VfpSavedState state = prepFpState(VfpRoundNearest);
2783        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2784        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2785        __asm__ __volatile__("" :: "m" (destElem));
2786        finishVfp(fpscr, state, true);
2787        Fpscr = fpscr;
2788    '''
2789    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2790            2, vcvtu2fpCode, fromInt = True)
2791    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2792            4, vcvtu2fpCode, fromInt = True)
2793
2794    vcvts2fpCode = '''
2795        FPSCR fpscr = Fpscr;
2796        VfpSavedState state = prepFpState(VfpRoundNearest);
2797        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2799        __asm__ __volatile__("" :: "m" (destElem));
2800        finishVfp(fpscr, state, true);
2801        Fpscr = fpscr;
2802    '''
2803    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2804            2, vcvts2fpCode, fromInt = True)
2805    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2806            4, vcvts2fpCode, fromInt = True)
2807
2808    vcvts2hCode = '''
2809        FPSCR fpscr = Fpscr;
2810        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2811        if (flushToZero(srcFp1))
2812            fpscr.idc = 1;
2813        VfpSavedState state = prepFpState(VfpRoundNearest);
2814        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2815                                : "m" (srcFp1), "m" (destElem));
2816        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2817                              fpscr.ahp, srcFp1);
2818        __asm__ __volatile__("" :: "m" (destElem));
2819        finishVfp(fpscr, state, true);
2820        Fpscr = fpscr;
2821    '''
2822    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2823
2824    vcvth2sCode = '''
2825        FPSCR fpscr = Fpscr;
2826        VfpSavedState state = prepFpState(VfpRoundNearest);
2827        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2828                                : "m" (srcElem1), "m" (destElem));
2829        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2830        __asm__ __volatile__("" :: "m" (destElem));
2831        finishVfp(fpscr, state, true);
2832        Fpscr = fpscr;
2833    '''
2834    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2835
2836    vrsqrteCode = '''
2837        destElem = unsignedRSqrtEstimate(srcElem1);
2838    '''
2839    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2840    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2841
2842    vrsqrtefpCode = '''
2843        FPSCR fpscr = Fpscr;
2844        if (flushToZero(srcReg1))
2845            fpscr.idc = 1;
2846        destReg = fprSqrtEstimate(fpscr, srcReg1);
2847        Fpscr = fpscr;
2848    '''
2849    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2850    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2851
2852    vrecpeCode = '''
2853        destElem = unsignedRecipEstimate(srcElem1);
2854    '''
2855    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2856    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2857
2858    vrecpefpCode = '''
2859        FPSCR fpscr = Fpscr;
2860        if (flushToZero(srcReg1))
2861            fpscr.idc = 1;
2862        destReg = fpRecipEstimate(fpscr, srcReg1);
2863        Fpscr = fpscr;
2864    '''
2865    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2866    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2867
2868    vrev16Code = '''
2869        destElem = srcElem1;
2870        unsigned groupSize = ((1 << 1) / sizeof(Element));
2871        unsigned reverseMask = (groupSize - 1);
2872        j = i ^ reverseMask;
2873    '''
2874    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2875    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2876    vrev32Code = '''
2877        destElem = srcElem1;
2878        unsigned groupSize = ((1 << 2) / sizeof(Element));
2879        unsigned reverseMask = (groupSize - 1);
2880        j = i ^ reverseMask;
2881    '''
2882    twoRegMiscInst("vrev32", "NVrev32D",
2883            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2884    twoRegMiscInst("vrev32", "NVrev32Q",
2885            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2886    vrev64Code = '''
2887        destElem = srcElem1;
2888        unsigned groupSize = ((1 << 3) / sizeof(Element));
2889        unsigned reverseMask = (groupSize - 1);
2890        j = i ^ reverseMask;
2891    '''
2892    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2893    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2894
2895    vpaddlCode = '''
2896        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2897    '''
2898    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2899    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2900
2901    vpadalCode = '''
2902        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2903    '''
2904    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2905    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2906
2907    vclsCode = '''
2908        unsigned count = 0;
2909        if (srcElem1 < 0) {
2910            srcElem1 <<= 1;
2911            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2912                count++;
2913                srcElem1 <<= 1;
2914            }
2915        } else {
2916            srcElem1 <<= 1;
2917            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2918                count++;
2919                srcElem1 <<= 1;
2920            }
2921        }
2922        destElem = count;
2923    '''
2924    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2925    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2926
2927    vclzCode = '''
2928        unsigned count = 0;
2929        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2930            count++;
2931            srcElem1 <<= 1;
2932        }
2933        destElem = count;
2934    '''
2935    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2936    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2937
2938    vcntCode = '''
2939        unsigned count = 0;
2940        while (srcElem1 && count < sizeof(Element) * 8) {
2941            count += srcElem1 & 0x1;
2942            srcElem1 >>= 1;
2943        }
2944        destElem = count;
2945    '''
2946
2947    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2948    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2949
2950    vmvnCode = '''
2951        destElem = ~srcElem1;
2952    '''
2953    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2954    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2955
2956    vqabsCode = '''
2957        FPSCR fpscr = (FPSCR)Fpscr;
2958        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2959            fpscr.qc = 1;
2960            destElem = ~srcElem1;
2961        } else if (srcElem1 < 0) {
2962            destElem = -srcElem1;
2963        } else {
2964            destElem = srcElem1;
2965        }
2966        Fpscr = fpscr;
2967    '''
2968    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2969    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2970
2971    vqnegCode = '''
2972        FPSCR fpscr = (FPSCR)Fpscr;
2973        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2974            fpscr.qc = 1;
2975            destElem = ~srcElem1;
2976        } else {
2977            destElem = -srcElem1;
2978        }
2979        Fpscr = fpscr;
2980    '''
2981    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2982    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2983
2984    vabsCode = '''
2985        if (srcElem1 < 0) {
2986            destElem = -srcElem1;
2987        } else {
2988            destElem = srcElem1;
2989        }
2990    '''
2991
2992    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
2993    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
2994    vabsfpCode = '''
2995        union
2996        {
2997            uint32_t i;
2998            float f;
2999        } cStruct;
3000        cStruct.f = srcReg1;
3001        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3002        destReg = cStruct.f;
3003    '''
3004    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3005    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3006
3007    vnegCode = '''
3008        destElem = -srcElem1;
3009    '''
3010    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3011    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3012    vnegfpCode = '''
3013        destReg = -srcReg1;
3014    '''
3015    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3016    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3017
3018    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3019    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3020    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3021    vcgtfpCode = '''
3022        FPSCR fpscr = (FPSCR)Fpscr;
3023        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3024                             true, true, VfpRoundNearest);
3025        destReg = (res == 0) ? -1 : 0;
3026        if (res == 2.0)
3027            fpscr.ioc = 1;
3028        Fpscr = fpscr;
3029    '''
3030    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3031            2, vcgtfpCode, toInt = True)
3032    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3033            4, vcgtfpCode, toInt = True)
3034
3035    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3036    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3037    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3038    vcgefpCode = '''
3039        FPSCR fpscr = (FPSCR)Fpscr;
3040        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3041                             true, true, VfpRoundNearest);
3042        destReg = (res == 0) ? -1 : 0;
3043        if (res == 2.0)
3044            fpscr.ioc = 1;
3045        Fpscr = fpscr;
3046    '''
3047    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3048            2, vcgefpCode, toInt = True)
3049    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3050            4, vcgefpCode, toInt = True)
3051
3052    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3053    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3054    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3055    vceqfpCode = '''
3056        FPSCR fpscr = (FPSCR)Fpscr;
3057        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3058                             true, true, VfpRoundNearest);
3059        destReg = (res == 0) ? -1 : 0;
3060        if (res == 2.0)
3061            fpscr.ioc = 1;
3062        Fpscr = fpscr;
3063    '''
3064    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3065            2, vceqfpCode, toInt = True)
3066    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3067            4, vceqfpCode, toInt = True)
3068
3069    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3070    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3071    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3072    vclefpCode = '''
3073        FPSCR fpscr = (FPSCR)Fpscr;
3074        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3075                             true, true, VfpRoundNearest);
3076        destReg = (res == 0) ? -1 : 0;
3077        if (res == 2.0)
3078            fpscr.ioc = 1;
3079        Fpscr = fpscr;
3080    '''
3081    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3082            2, vclefpCode, toInt = True)
3083    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3084            4, vclefpCode, toInt = True)
3085
3086    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3087    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3088    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3089    vcltfpCode = '''
3090        FPSCR fpscr = (FPSCR)Fpscr;
3091        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3092                             true, true, VfpRoundNearest);
3093        destReg = (res == 0) ? -1 : 0;
3094        if (res == 2.0)
3095            fpscr.ioc = 1;
3096        Fpscr = fpscr;
3097    '''
3098    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3099            2, vcltfpCode, toInt = True)
3100    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3101            4, vcltfpCode, toInt = True)
3102
3103    vswpCode = '''
3104        FloatRegBits mid;
3105        for (unsigned r = 0; r < rCount; r++) {
3106            mid = srcReg1.regs[r];
3107            srcReg1.regs[r] = destReg.regs[r];
3108            destReg.regs[r] = mid;
3109        }
3110    '''
3111    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3112    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3113
3114    vtrnCode = '''
3115        Element mid;
3116        for (unsigned i = 0; i < eCount; i += 2) {
3117            mid = srcReg1.elements[i];
3118            srcReg1.elements[i] = destReg.elements[i + 1];
3119            destReg.elements[i + 1] = mid;
3120        }
3121    '''
3122    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3123    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3124
3125    vuzpCode = '''
3126        Element mid[eCount];
3127        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3128        for (unsigned i = 0; i < eCount / 2; i++) {
3129            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3130            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3131            destReg.elements[i] = destReg.elements[2 * i];
3132        }
3133        for (unsigned i = 0; i < eCount / 2; i++) {
3134            destReg.elements[eCount / 2 + i] = mid[2 * i];
3135        }
3136    '''
3137    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3138    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3139
3140    vzipCode = '''
3141        Element mid[eCount];
3142        memcpy(&mid, &destReg, sizeof(destReg));
3143        for (unsigned i = 0; i < eCount / 2; i++) {
3144            destReg.elements[2 * i] = mid[i];
3145            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3146        }
3147        for (int i = 0; i < eCount / 2; i++) {
3148            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3149            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3150        }
3151    '''
3152    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3153    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3154
3155    vmovnCode = 'destElem = srcElem1;'
3156    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3157
3158    vdupCode = 'destElem = srcElem1;'
3159    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3160    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3161
3162    def vdupGprInst(name, Name, opClass, types, rCount):
3163        global header_output, exec_output
3164        eWalkCode = '''
3165        RegVect destReg;
3166        for (unsigned i = 0; i < eCount; i++) {
3167            destReg.elements[i] = htog((Element)Op1);
3168        }
3169        '''
3170        for reg in range(rCount):
3171            eWalkCode += '''
3172            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3173            ''' % { "reg" : reg }
3174        iop = InstObjParams(name, Name,
3175                            "RegRegOp",
3176                            { "code": eWalkCode,
3177                              "r_count": rCount,
3178                              "predicate_test": predicateTest,
3179                              "op_class": opClass }, [])
3180        header_output += NeonRegRegOpDeclare.subst(iop)
3181        exec_output += NeonEqualRegExecute.subst(iop)
3182        for type in types:
3183            substDict = { "targs" : type,
3184                          "class_name" : Name }
3185            exec_output += NeonExecDeclare.subst(substDict)
3186    vdupGprInst("vdup", "NVdupDGpr", "SimdAluOp", smallUnsignedTypes, 2)
3187    vdupGprInst("vdup", "NVdupQGpr", "SimdAluOp", smallUnsignedTypes, 4)
3188
3189    vmovCode = 'destElem = imm;'
3190    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3191    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3192
3193    vorrCode = 'destElem |= imm;'
3194    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3195    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3196
3197    vmvnCode = 'destElem = ~imm;'
3198    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3199    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3200
3201    vbicCode = 'destElem &= ~imm;'
3202    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3203    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3204
3205    vqmovnCode = '''
3206    FPSCR fpscr = (FPSCR)Fpscr;
3207    destElem = srcElem1;
3208    if ((BigElement)destElem != srcElem1) {
3209        fpscr.qc = 1;
3210        destElem = mask(sizeof(Element) * 8 - 1);
3211        if (srcElem1 < 0)
3212            destElem = ~destElem;
3213    }
3214    Fpscr = fpscr;
3215    '''
3216    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3217
3218    vqmovunCode = '''
3219    FPSCR fpscr = (FPSCR)Fpscr;
3220    destElem = srcElem1;
3221    if ((BigElement)destElem != srcElem1) {
3222        fpscr.qc = 1;
3223        destElem = mask(sizeof(Element) * 8);
3224    }
3225    Fpscr = fpscr;
3226    '''
3227    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3228            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3229
3230    vqmovunsCode = '''
3231    FPSCR fpscr = (FPSCR)Fpscr;
3232    destElem = srcElem1;
3233    if (srcElem1 < 0 ||
3234            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3235        fpscr.qc = 1;
3236        destElem = mask(sizeof(Element) * 8);
3237        if (srcElem1 < 0)
3238            destElem = ~destElem;
3239    }
3240    Fpscr = fpscr;
3241    '''
3242    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3243            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3244
3245    def buildVext(name, Name, opClass, types, rCount, op):
3246        global header_output, exec_output
3247        eWalkCode = '''
3248        RegVect srcReg1, srcReg2, destReg;
3249        '''
3250        for reg in range(rCount):
3251            eWalkCode += simdEnabledCheckCode + '''
3252                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);
3253                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d.uw);
3254            ''' % { "reg" : reg }
3255        eWalkCode += op
3256        for reg in range(rCount):
3257            eWalkCode += '''
3258            FpDestP%(reg)d.uw = gtoh(destReg.regs[%(reg)d]);
3259            ''' % { "reg" : reg }
3260        iop = InstObjParams(name, Name,
3261                            "RegRegRegImmOp",
3262                            { "code": eWalkCode,
3263                              "r_count": rCount,
3264                              "predicate_test": predicateTest,
3265                              "op_class": opClass }, [])
3266        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3267        exec_output += NeonEqualRegExecute.subst(iop)
3268        for type in types:
3269            substDict = { "targs" : type,
3270                          "class_name" : Name }
3271            exec_output += NeonExecDeclare.subst(substDict)
3272
3273    vextCode = '''
3274        for (unsigned i = 0; i < eCount; i++) {
3275            unsigned index = i + imm;
3276            if (index < eCount) {
3277                destReg.elements[i] = srcReg1.elements[index];
3278            } else {
3279                index -= eCount;
3280                assert(index < eCount);
3281                destReg.elements[i] = srcReg2.elements[index];
3282            }
3283        }
3284    '''
3285    buildVext("vext", "NVextD", "SimdAluOp", ("uint8_t",), 2, vextCode)
3286    buildVext("vext", "NVextQ", "SimdAluOp", ("uint8_t",), 4, vextCode)
3287
3288    def buildVtbxl(name, Name, opClass, length, isVtbl):
3289        global header_output, decoder_output, exec_output
3290        code = '''
3291            union
3292            {
3293                uint8_t bytes[32];
3294                FloatRegBits regs[8];
3295            } table;
3296
3297            union
3298            {
3299                uint8_t bytes[8];
3300                FloatRegBits regs[2];
3301            } destReg, srcReg2;
3302
3303            const unsigned length = %(length)d;
3304            const bool isVtbl = %(isVtbl)s;
3305
3306            srcReg2.regs[0] = htog(FpOp2P0.uw);
3307            srcReg2.regs[1] = htog(FpOp2P1.uw);
3308
3309            destReg.regs[0] = htog(FpDestP0.uw);
3310            destReg.regs[1] = htog(FpDestP1.uw);
3311        ''' % { "length" : length, "isVtbl" : isVtbl }
3312        for reg in range(8):
3313            if reg < length * 2:
3314                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d.uw);\n' % \
3315                        { "reg" : reg }
3316            else:
3317                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3318        code += '''
3319        for (unsigned i = 0; i < sizeof(destReg); i++) {
3320            uint8_t index = srcReg2.bytes[i];
3321            if (index < 8 * length) {
3322                destReg.bytes[i] = table.bytes[index];
3323            } else {
3324                if (isVtbl)
3325                    destReg.bytes[i] = 0;
3326                // else destReg.bytes[i] unchanged
3327            }
3328        }
3329
3330        FpDestP0.uw = gtoh(destReg.regs[0]);
3331        FpDestP1.uw = gtoh(destReg.regs[1]);
3332        '''
3333        iop = InstObjParams(name, Name,
3334                            "RegRegRegOp",
3335                            { "code": code,
3336                              "predicate_test": predicateTest,
3337                              "op_class": opClass }, [])
3338        header_output += RegRegRegOpDeclare.subst(iop)
3339        decoder_output += RegRegRegOpConstructor.subst(iop)
3340        exec_output += PredOpExecute.subst(iop)
3341
3342    buildVtbxl("vtbl", "NVtbl1", "SimdAluOp", 1, "true")
3343    buildVtbxl("vtbl", "NVtbl2", "SimdAluOp", 2, "true")
3344    buildVtbxl("vtbl", "NVtbl3", "SimdAluOp", 3, "true")
3345    buildVtbxl("vtbl", "NVtbl4", "SimdAluOp", 4, "true")
3346
3347    buildVtbxl("vtbx", "NVtbx1", "SimdAluOp", 1, "false")
3348    buildVtbxl("vtbx", "NVtbx2", "SimdAluOp", 2, "false")
3349    buildVtbxl("vtbx", "NVtbx3", "SimdAluOp", 3, "false")
3350    buildVtbxl("vtbx", "NVtbx4", "SimdAluOp", 4, "false")
3351}};
3352