neon.isa revision 9517:5ffb5e5c93b4
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (std::isnan(op1) || std::isnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (std::isnan(op1) || std::isnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (std::isnan(op1) || std::isnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (std::isnan(op1) || std::isnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (std::isnan(op1) || std::isnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (std::isnan(op1) || std::isnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623
624    header_output = ""
625    exec_output = ""
626
627    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630    signedTypes = smallSignedTypes + ("int64_t",)
631    smallTypes = smallUnsignedTypes + smallSignedTypes
632    allTypes = unsignedTypes + signedTypes
633
634    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635                          readDest=False, pairwise=False):
636        global header_output, exec_output
637        eWalkCode = simdEnabledCheckCode + '''
638        RegVect srcReg1, srcReg2, destReg;
639        '''
640        for reg in range(rCount):
641            eWalkCode += '''
642                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644            ''' % { "reg" : reg }
645            if readDest:
646                eWalkCode += '''
647                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648                ''' % { "reg" : reg }
649        readDestCode = ''
650        if readDest:
651            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652        if pairwise:
653            eWalkCode += '''
654            for (unsigned i = 0; i < eCount; i++) {
655                Element srcElem1 = gtoh(2 * i < eCount ?
656                                        srcReg1.elements[2 * i] :
657                                        srcReg2.elements[2 * i - eCount]);
658                Element srcElem2 = gtoh(2 * i < eCount ?
659                                        srcReg1.elements[2 * i + 1] :
660                                        srcReg2.elements[2 * i + 1 - eCount]);
661                Element destElem;
662                %(readDest)s
663                %(op)s
664                destReg.elements[i] = htog(destElem);
665            }
666            ''' % { "op" : op, "readDest" : readDestCode }
667        else:
668            eWalkCode += '''
669            for (unsigned i = 0; i < eCount; i++) {
670                Element srcElem1 = gtoh(srcReg1.elements[i]);
671                Element srcElem2 = gtoh(srcReg2.elements[i]);
672                Element destElem;
673                %(readDest)s
674                %(op)s
675                destReg.elements[i] = htog(destElem);
676            }
677            ''' % { "op" : op, "readDest" : readDestCode }
678        for reg in range(rCount):
679            eWalkCode += '''
680            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681            ''' % { "reg" : reg }
682        iop = InstObjParams(name, Name,
683                            "RegRegRegOp",
684                            { "code": eWalkCode,
685                              "r_count": rCount,
686                              "predicate_test": predicateTest,
687                              "op_class": opClass }, [])
688        header_output += NeonRegRegRegOpDeclare.subst(iop)
689        exec_output += NeonEqualRegExecute.subst(iop)
690        for type in types:
691            substDict = { "targs" : type,
692                          "class_name" : Name }
693            exec_output += NeonExecDeclare.subst(substDict)
694
695    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696                            readDest=False, pairwise=False, toInt=False):
697        global header_output, exec_output
698        eWalkCode = simdEnabledCheckCode + '''
699        typedef FloatReg FloatVect[rCount];
700        FloatVect srcRegs1, srcRegs2;
701        '''
702        if toInt:
703            eWalkCode += 'RegVect destRegs;\n'
704        else:
705            eWalkCode += 'FloatVect destRegs;\n'
706        for reg in range(rCount):
707            eWalkCode += '''
708                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710            ''' % { "reg" : reg }
711            if readDest:
712                if toInt:
713                    eWalkCode += '''
714                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715                    ''' % { "reg" : reg }
716                else:
717                    eWalkCode += '''
718                        destRegs[%(reg)d] = FpDestP%(reg)d;
719                    ''' % { "reg" : reg }
720        readDestCode = ''
721        if readDest:
722            readDestCode = 'destReg = destRegs[r];'
723        destType = 'FloatReg'
724        writeDest = 'destRegs[r] = destReg;'
725        if toInt:
726            destType = 'FloatRegBits'
727            writeDest = 'destRegs.regs[r] = destReg;'
728        if pairwise:
729            eWalkCode += '''
730            for (unsigned r = 0; r < rCount; r++) {
731                FloatReg srcReg1 = (2 * r < rCount) ?
732                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733                FloatReg srcReg2 = (2 * r < rCount) ?
734                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735                %(destType)s destReg;
736                %(readDest)s
737                %(op)s
738                %(writeDest)s
739            }
740            ''' % { "op" : op,
741                    "readDest" : readDestCode,
742                    "destType" : destType,
743                    "writeDest" : writeDest }
744        else:
745            eWalkCode += '''
746            for (unsigned r = 0; r < rCount; r++) {
747                FloatReg srcReg1 = srcRegs1[r];
748                FloatReg srcReg2 = srcRegs2[r];
749                %(destType)s destReg;
750                %(readDest)s
751                %(op)s
752                %(writeDest)s
753            }
754            ''' % { "op" : op,
755                    "readDest" : readDestCode,
756                    "destType" : destType,
757                    "writeDest" : writeDest }
758        for reg in range(rCount):
759            if toInt:
760                eWalkCode += '''
761                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762                ''' % { "reg" : reg }
763            else:
764                eWalkCode += '''
765                FpDestP%(reg)d = destRegs[%(reg)d];
766                ''' % { "reg" : reg }
767        iop = InstObjParams(name, Name,
768                            "FpRegRegRegOp",
769                            { "code": eWalkCode,
770                              "r_count": rCount,
771                              "predicate_test": predicateTest,
772                              "op_class": opClass }, [])
773        header_output += NeonRegRegRegOpDeclare.subst(iop)
774        exec_output += NeonEqualRegExecute.subst(iop)
775        for type in types:
776            substDict = { "targs" : type,
777                          "class_name" : Name }
778            exec_output += NeonExecDeclare.subst(substDict)
779
780    def threeUnequalRegInst(name, Name, opClass, types, op,
781                            bigSrc1, bigSrc2, bigDest, readDest):
782        global header_output, exec_output
783        src1Cnt = src2Cnt = destCnt = 2
784        src1Prefix = src2Prefix = destPrefix = ''
785        if bigSrc1:
786            src1Cnt = 4
787            src1Prefix = 'Big'
788        if bigSrc2:
789            src2Cnt = 4
790            src2Prefix = 'Big'
791        if bigDest:
792            destCnt = 4
793            destPrefix = 'Big'
794        eWalkCode = simdEnabledCheckCode + '''
795            %sRegVect srcReg1;
796            %sRegVect srcReg2;
797            %sRegVect destReg;
798        ''' % (src1Prefix, src2Prefix, destPrefix)
799        for reg in range(src1Cnt):
800            eWalkCode += '''
801                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802            ''' % { "reg" : reg }
803        for reg in range(src2Cnt):
804            eWalkCode += '''
805                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806            ''' % { "reg" : reg }
807        if readDest:
808            for reg in range(destCnt):
809                eWalkCode += '''
810                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811                ''' % { "reg" : reg }
812        readDestCode = ''
813        if readDest:
814            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815        eWalkCode += '''
816        for (unsigned i = 0; i < eCount; i++) {
817            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819            %(destPrefix)sElement destElem;
820            %(readDest)s
821            %(op)s
822            destReg.elements[i] = htog(destElem);
823        }
824        ''' % { "op" : op, "readDest" : readDestCode,
825                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826                "destPrefix" : destPrefix }
827        for reg in range(destCnt):
828            eWalkCode += '''
829            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830            ''' % { "reg" : reg }
831        iop = InstObjParams(name, Name,
832                            "RegRegRegOp",
833                            { "code": eWalkCode,
834                              "r_count": 2,
835                              "predicate_test": predicateTest,
836                              "op_class": opClass }, [])
837        header_output += NeonRegRegRegOpDeclare.subst(iop)
838        exec_output += NeonUnequalRegExecute.subst(iop)
839        for type in types:
840            substDict = { "targs" : type,
841                          "class_name" : Name }
842            exec_output += NeonExecDeclare.subst(substDict)
843
844    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845        threeUnequalRegInst(name, Name, opClass, types, op,
846                            True, True, False, readDest)
847
848    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849        threeUnequalRegInst(name, Name, opClass, types, op,
850                            False, False, True, readDest)
851
852    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853        threeUnequalRegInst(name, Name, opClass, types, op,
854                            True, False, True, readDest)
855
856    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857        global header_output, exec_output
858        eWalkCode = simdEnabledCheckCode + '''
859        RegVect srcReg1, srcReg2, destReg;
860        '''
861        for reg in range(rCount):
862            eWalkCode += '''
863                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865            ''' % { "reg" : reg }
866            if readDest:
867                eWalkCode += '''
868                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869                ''' % { "reg" : reg }
870        readDestCode = ''
871        if readDest:
872            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873        eWalkCode += '''
874        if (imm < 0 && imm >= eCount) {
875            if (FullSystem)
876                fault = new UndefinedInstruction;
877            else
878                fault = new UndefinedInstruction(false, mnemonic);
879        } else {
880            for (unsigned i = 0; i < eCount; i++) {
881                Element srcElem1 = gtoh(srcReg1.elements[i]);
882                Element srcElem2 = gtoh(srcReg2.elements[imm]);
883                Element destElem;
884                %(readDest)s
885                %(op)s
886                destReg.elements[i] = htog(destElem);
887            }
888        }
889        ''' % { "op" : op, "readDest" : readDestCode }
890        for reg in range(rCount):
891            eWalkCode += '''
892            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893            ''' % { "reg" : reg }
894        iop = InstObjParams(name, Name,
895                            "RegRegRegImmOp",
896                            { "code": eWalkCode,
897                              "r_count": rCount,
898                              "predicate_test": predicateTest,
899                              "op_class": opClass }, [])
900        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901        exec_output += NeonEqualRegExecute.subst(iop)
902        for type in types:
903            substDict = { "targs" : type,
904                          "class_name" : Name }
905            exec_output += NeonExecDeclare.subst(substDict)
906
907    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908        global header_output, exec_output
909        rCount = 2
910        eWalkCode = simdEnabledCheckCode + '''
911        RegVect srcReg1, srcReg2;
912        BigRegVect destReg;
913        '''
914        for reg in range(rCount):
915            eWalkCode += '''
916                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918            ''' % { "reg" : reg }
919        if readDest:
920            for reg in range(2 * rCount):
921                eWalkCode += '''
922                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923                ''' % { "reg" : reg }
924        readDestCode = ''
925        if readDest:
926            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
927        eWalkCode += '''
928        if (imm < 0 && imm >= eCount) {
929            if (FullSystem)
930                fault = new UndefinedInstruction;
931            else
932                fault = new UndefinedInstruction(false, mnemonic);
933        } else {
934            for (unsigned i = 0; i < eCount; i++) {
935                Element srcElem1 = gtoh(srcReg1.elements[i]);
936                Element srcElem2 = gtoh(srcReg2.elements[imm]);
937                BigElement destElem;
938                %(readDest)s
939                %(op)s
940                destReg.elements[i] = htog(destElem);
941            }
942        }
943        ''' % { "op" : op, "readDest" : readDestCode }
944        for reg in range(2 * rCount):
945            eWalkCode += '''
946            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947            ''' % { "reg" : reg }
948        iop = InstObjParams(name, Name,
949                            "RegRegRegImmOp",
950                            { "code": eWalkCode,
951                              "r_count": rCount,
952                              "predicate_test": predicateTest,
953                              "op_class": opClass }, [])
954        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955        exec_output += NeonUnequalRegExecute.subst(iop)
956        for type in types:
957            substDict = { "targs" : type,
958                          "class_name" : Name }
959            exec_output += NeonExecDeclare.subst(substDict)
960
961    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962        global header_output, exec_output
963        eWalkCode = simdEnabledCheckCode + '''
964        typedef FloatReg FloatVect[rCount];
965        FloatVect srcRegs1, srcRegs2, destRegs;
966        '''
967        for reg in range(rCount):
968            eWalkCode += '''
969                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971            ''' % { "reg" : reg }
972            if readDest:
973                eWalkCode += '''
974                    destRegs[%(reg)d] = FpDestP%(reg)d;
975                ''' % { "reg" : reg }
976        readDestCode = ''
977        if readDest:
978            readDestCode = 'destReg = destRegs[i];'
979        eWalkCode += '''
980        if (imm < 0 && imm >= eCount) {
981            if (FullSystem)
982                fault = new UndefinedInstruction;
983            else
984                fault = new UndefinedInstruction(false, mnemonic);
985        } else {
986            for (unsigned i = 0; i < rCount; i++) {
987                FloatReg srcReg1 = srcRegs1[i];
988                FloatReg srcReg2 = srcRegs2[imm];
989                FloatReg destReg;
990                %(readDest)s
991                %(op)s
992                destRegs[i] = destReg;
993            }
994        }
995        ''' % { "op" : op, "readDest" : readDestCode }
996        for reg in range(rCount):
997            eWalkCode += '''
998            FpDestP%(reg)d = destRegs[%(reg)d];
999            ''' % { "reg" : reg }
1000        iop = InstObjParams(name, Name,
1001                            "FpRegRegRegImmOp",
1002                            { "code": eWalkCode,
1003                              "r_count": rCount,
1004                              "predicate_test": predicateTest,
1005                              "op_class": opClass }, [])
1006        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007        exec_output += NeonEqualRegExecute.subst(iop)
1008        for type in types:
1009            substDict = { "targs" : type,
1010                          "class_name" : Name }
1011            exec_output += NeonExecDeclare.subst(substDict)
1012
1013    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014            readDest=False, toInt=False, fromInt=False):
1015        global header_output, exec_output
1016        eWalkCode = simdEnabledCheckCode + '''
1017        RegVect srcRegs1, destRegs;
1018        '''
1019        for reg in range(rCount):
1020            eWalkCode += '''
1021                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022            ''' % { "reg" : reg }
1023            if readDest:
1024                eWalkCode += '''
1025                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026                ''' % { "reg" : reg }
1027        readDestCode = ''
1028        if readDest:
1029            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1030            if toInt:
1031                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1033        if fromInt:
1034            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035        declDest = 'Element destElem;'
1036        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1037        if toInt:
1038            declDest = 'FloatRegBits destReg;'
1039            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1040        eWalkCode += '''
1041        for (unsigned i = 0; i < eCount; i++) {
1042            %(readOp)s
1043            %(declDest)s
1044            %(readDest)s
1045            %(op)s
1046            %(writeDest)s
1047        }
1048        ''' % { "readOp" : readOpCode,
1049                "declDest" : declDest,
1050                "readDest" : readDestCode,
1051                "op" : op,
1052                "writeDest" : writeDestCode }
1053        for reg in range(rCount):
1054            eWalkCode += '''
1055            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056            ''' % { "reg" : reg }
1057        iop = InstObjParams(name, Name,
1058                            "RegRegImmOp",
1059                            { "code": eWalkCode,
1060                              "r_count": rCount,
1061                              "predicate_test": predicateTest,
1062                              "op_class": opClass }, [])
1063        header_output += NeonRegRegImmOpDeclare.subst(iop)
1064        exec_output += NeonEqualRegExecute.subst(iop)
1065        for type in types:
1066            substDict = { "targs" : type,
1067                          "class_name" : Name }
1068            exec_output += NeonExecDeclare.subst(substDict)
1069
1070    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071        global header_output, exec_output
1072        eWalkCode = simdEnabledCheckCode + '''
1073        BigRegVect srcReg1;
1074        RegVect destReg;
1075        '''
1076        for reg in range(4):
1077            eWalkCode += '''
1078                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079            ''' % { "reg" : reg }
1080        if readDest:
1081            for reg in range(2):
1082                eWalkCode += '''
1083                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084                ''' % { "reg" : reg }
1085        readDestCode = ''
1086        if readDest:
1087            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1088        eWalkCode += '''
1089        for (unsigned i = 0; i < eCount; i++) {
1090            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1091            Element destElem;
1092            %(readDest)s
1093            %(op)s
1094            destReg.elements[i] = htog(destElem);
1095        }
1096        ''' % { "op" : op, "readDest" : readDestCode }
1097        for reg in range(2):
1098            eWalkCode += '''
1099            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100            ''' % { "reg" : reg }
1101        iop = InstObjParams(name, Name,
1102                            "RegRegImmOp",
1103                            { "code": eWalkCode,
1104                              "r_count": 2,
1105                              "predicate_test": predicateTest,
1106                              "op_class": opClass }, [])
1107        header_output += NeonRegRegImmOpDeclare.subst(iop)
1108        exec_output += NeonUnequalRegExecute.subst(iop)
1109        for type in types:
1110            substDict = { "targs" : type,
1111                          "class_name" : Name }
1112            exec_output += NeonExecDeclare.subst(substDict)
1113
1114    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115        global header_output, exec_output
1116        eWalkCode = simdEnabledCheckCode + '''
1117        RegVect srcReg1;
1118        BigRegVect destReg;
1119        '''
1120        for reg in range(2):
1121            eWalkCode += '''
1122                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123            ''' % { "reg" : reg }
1124        if readDest:
1125            for reg in range(4):
1126                eWalkCode += '''
1127                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128                ''' % { "reg" : reg }
1129        readDestCode = ''
1130        if readDest:
1131            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1132        eWalkCode += '''
1133        for (unsigned i = 0; i < eCount; i++) {
1134            Element srcElem1 = gtoh(srcReg1.elements[i]);
1135            BigElement destElem;
1136            %(readDest)s
1137            %(op)s
1138            destReg.elements[i] = htog(destElem);
1139        }
1140        ''' % { "op" : op, "readDest" : readDestCode }
1141        for reg in range(4):
1142            eWalkCode += '''
1143            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144            ''' % { "reg" : reg }
1145        iop = InstObjParams(name, Name,
1146                            "RegRegImmOp",
1147                            { "code": eWalkCode,
1148                              "r_count": 2,
1149                              "predicate_test": predicateTest,
1150                              "op_class": opClass }, [])
1151        header_output += NeonRegRegImmOpDeclare.subst(iop)
1152        exec_output += NeonUnequalRegExecute.subst(iop)
1153        for type in types:
1154            substDict = { "targs" : type,
1155                          "class_name" : Name }
1156            exec_output += NeonExecDeclare.subst(substDict)
1157
1158    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159        global header_output, exec_output
1160        eWalkCode = simdEnabledCheckCode + '''
1161        RegVect srcReg1, destReg;
1162        '''
1163        for reg in range(rCount):
1164            eWalkCode += '''
1165                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166            ''' % { "reg" : reg }
1167            if readDest:
1168                eWalkCode += '''
1169                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170                ''' % { "reg" : reg }
1171        readDestCode = ''
1172        if readDest:
1173            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1174        eWalkCode += '''
1175        for (unsigned i = 0; i < eCount; i++) {
1176            unsigned j = i;
1177            Element srcElem1 = gtoh(srcReg1.elements[i]);
1178            Element destElem;
1179            %(readDest)s
1180            %(op)s
1181            destReg.elements[j] = htog(destElem);
1182        }
1183        ''' % { "op" : op, "readDest" : readDestCode }
1184        for reg in range(rCount):
1185            eWalkCode += '''
1186            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187            ''' % { "reg" : reg }
1188        iop = InstObjParams(name, Name,
1189                            "RegRegOp",
1190                            { "code": eWalkCode,
1191                              "r_count": rCount,
1192                              "predicate_test": predicateTest,
1193                              "op_class": opClass }, [])
1194        header_output += NeonRegRegOpDeclare.subst(iop)
1195        exec_output += NeonEqualRegExecute.subst(iop)
1196        for type in types:
1197            substDict = { "targs" : type,
1198                          "class_name" : Name }
1199            exec_output += NeonExecDeclare.subst(substDict)
1200
1201    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202        global header_output, exec_output
1203        eWalkCode = simdEnabledCheckCode + '''
1204        RegVect srcReg1, destReg;
1205        '''
1206        for reg in range(rCount):
1207            eWalkCode += '''
1208                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209            ''' % { "reg" : reg }
1210            if readDest:
1211                eWalkCode += '''
1212                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213                ''' % { "reg" : reg }
1214        readDestCode = ''
1215        if readDest:
1216            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1217        eWalkCode += '''
1218        for (unsigned i = 0; i < eCount; i++) {
1219            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1220            Element destElem;
1221            %(readDest)s
1222            %(op)s
1223            destReg.elements[i] = htog(destElem);
1224        }
1225        ''' % { "op" : op, "readDest" : readDestCode }
1226        for reg in range(rCount):
1227            eWalkCode += '''
1228            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229            ''' % { "reg" : reg }
1230        iop = InstObjParams(name, Name,
1231                            "RegRegImmOp",
1232                            { "code": eWalkCode,
1233                              "r_count": rCount,
1234                              "predicate_test": predicateTest,
1235                              "op_class": opClass }, [])
1236        header_output += NeonRegRegImmOpDeclare.subst(iop)
1237        exec_output += NeonEqualRegExecute.subst(iop)
1238        for type in types:
1239            substDict = { "targs" : type,
1240                          "class_name" : Name }
1241            exec_output += NeonExecDeclare.subst(substDict)
1242
1243    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244        global header_output, exec_output
1245        eWalkCode = simdEnabledCheckCode + '''
1246        RegVect srcReg1, destReg;
1247        '''
1248        for reg in range(rCount):
1249            eWalkCode += '''
1250                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252            ''' % { "reg" : reg }
1253            if readDest:
1254                eWalkCode += '''
1255                ''' % { "reg" : reg }
1256        readDestCode = ''
1257        if readDest:
1258            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1259        eWalkCode += op
1260        for reg in range(rCount):
1261            eWalkCode += '''
1262            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264            ''' % { "reg" : reg }
1265        iop = InstObjParams(name, Name,
1266                            "RegRegOp",
1267                            { "code": eWalkCode,
1268                              "r_count": rCount,
1269                              "predicate_test": predicateTest,
1270                              "op_class": opClass }, [])
1271        header_output += NeonRegRegOpDeclare.subst(iop)
1272        exec_output += NeonEqualRegExecute.subst(iop)
1273        for type in types:
1274            substDict = { "targs" : type,
1275                          "class_name" : Name }
1276            exec_output += NeonExecDeclare.subst(substDict)
1277
1278    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279            readDest=False, toInt=False):
1280        global header_output, exec_output
1281        eWalkCode = simdEnabledCheckCode + '''
1282        typedef FloatReg FloatVect[rCount];
1283        FloatVect srcRegs1;
1284        '''
1285        if toInt:
1286            eWalkCode += 'RegVect destRegs;\n'
1287        else:
1288            eWalkCode += 'FloatVect destRegs;\n'
1289        for reg in range(rCount):
1290            eWalkCode += '''
1291                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292            ''' % { "reg" : reg }
1293            if readDest:
1294                if toInt:
1295                    eWalkCode += '''
1296                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297                    ''' % { "reg" : reg }
1298                else:
1299                    eWalkCode += '''
1300                        destRegs[%(reg)d] = FpDestP%(reg)d;
1301                    ''' % { "reg" : reg }
1302        readDestCode = ''
1303        if readDest:
1304            readDestCode = 'destReg = destRegs[i];'
1305        destType = 'FloatReg'
1306        writeDest = 'destRegs[r] = destReg;'
1307        if toInt:
1308            destType = 'FloatRegBits'
1309            writeDest = 'destRegs.regs[r] = destReg;'
1310        eWalkCode += '''
1311        for (unsigned r = 0; r < rCount; r++) {
1312            FloatReg srcReg1 = srcRegs1[r];
1313            %(destType)s destReg;
1314            %(readDest)s
1315            %(op)s
1316            %(writeDest)s
1317        }
1318        ''' % { "op" : op,
1319                "readDest" : readDestCode,
1320                "destType" : destType,
1321                "writeDest" : writeDest }
1322        for reg in range(rCount):
1323            if toInt:
1324                eWalkCode += '''
1325                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326                ''' % { "reg" : reg }
1327            else:
1328                eWalkCode += '''
1329                FpDestP%(reg)d = destRegs[%(reg)d];
1330                ''' % { "reg" : reg }
1331        iop = InstObjParams(name, Name,
1332                            "FpRegRegOp",
1333                            { "code": eWalkCode,
1334                              "r_count": rCount,
1335                              "predicate_test": predicateTest,
1336                              "op_class": opClass }, [])
1337        header_output += NeonRegRegOpDeclare.subst(iop)
1338        exec_output += NeonEqualRegExecute.subst(iop)
1339        for type in types:
1340            substDict = { "targs" : type,
1341                          "class_name" : Name }
1342            exec_output += NeonExecDeclare.subst(substDict)
1343
1344    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345        global header_output, exec_output
1346        eWalkCode = simdEnabledCheckCode + '''
1347        RegVect srcRegs;
1348        BigRegVect destReg;
1349        '''
1350        for reg in range(rCount):
1351            eWalkCode += '''
1352                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353            ''' % { "reg" : reg }
1354            if readDest:
1355                eWalkCode += '''
1356                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357                ''' % { "reg" : reg }
1358        readDestCode = ''
1359        if readDest:
1360            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1361        eWalkCode += '''
1362        for (unsigned i = 0; i < eCount / 2; i++) {
1363            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365            BigElement destElem;
1366            %(readDest)s
1367            %(op)s
1368            destReg.elements[i] = htog(destElem);
1369        }
1370        ''' % { "op" : op, "readDest" : readDestCode }
1371        for reg in range(rCount):
1372            eWalkCode += '''
1373            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374            ''' % { "reg" : reg }
1375        iop = InstObjParams(name, Name,
1376                            "RegRegOp",
1377                            { "code": eWalkCode,
1378                              "r_count": rCount,
1379                              "predicate_test": predicateTest,
1380                              "op_class": opClass }, [])
1381        header_output += NeonRegRegOpDeclare.subst(iop)
1382        exec_output += NeonUnequalRegExecute.subst(iop)
1383        for type in types:
1384            substDict = { "targs" : type,
1385                          "class_name" : Name }
1386            exec_output += NeonExecDeclare.subst(substDict)
1387
1388    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389        global header_output, exec_output
1390        eWalkCode = simdEnabledCheckCode + '''
1391        BigRegVect srcReg1;
1392        RegVect destReg;
1393        '''
1394        for reg in range(4):
1395            eWalkCode += '''
1396                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397            ''' % { "reg" : reg }
1398        if readDest:
1399            for reg in range(2):
1400                eWalkCode += '''
1401                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402                ''' % { "reg" : reg }
1403        readDestCode = ''
1404        if readDest:
1405            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1406        eWalkCode += '''
1407        for (unsigned i = 0; i < eCount; i++) {
1408            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1409            Element destElem;
1410            %(readDest)s
1411            %(op)s
1412            destReg.elements[i] = htog(destElem);
1413        }
1414        ''' % { "op" : op, "readDest" : readDestCode }
1415        for reg in range(2):
1416            eWalkCode += '''
1417            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418            ''' % { "reg" : reg }
1419        iop = InstObjParams(name, Name,
1420                            "RegRegOp",
1421                            { "code": eWalkCode,
1422                              "r_count": 2,
1423                              "predicate_test": predicateTest,
1424                              "op_class": opClass }, [])
1425        header_output += NeonRegRegOpDeclare.subst(iop)
1426        exec_output += NeonUnequalRegExecute.subst(iop)
1427        for type in types:
1428            substDict = { "targs" : type,
1429                          "class_name" : Name }
1430            exec_output += NeonExecDeclare.subst(substDict)
1431
1432    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433        global header_output, exec_output
1434        eWalkCode = simdEnabledCheckCode + '''
1435        RegVect destReg;
1436        '''
1437        if readDest:
1438            for reg in range(rCount):
1439                eWalkCode += '''
1440                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441                ''' % { "reg" : reg }
1442        readDestCode = ''
1443        if readDest:
1444            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1445        eWalkCode += '''
1446        for (unsigned i = 0; i < eCount; i++) {
1447            Element destElem;
1448            %(readDest)s
1449            %(op)s
1450            destReg.elements[i] = htog(destElem);
1451        }
1452        ''' % { "op" : op, "readDest" : readDestCode }
1453        for reg in range(rCount):
1454            eWalkCode += '''
1455            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456            ''' % { "reg" : reg }
1457        iop = InstObjParams(name, Name,
1458                            "RegImmOp",
1459                            { "code": eWalkCode,
1460                              "r_count": rCount,
1461                              "predicate_test": predicateTest,
1462                              "op_class": opClass }, [])
1463        header_output += NeonRegImmOpDeclare.subst(iop)
1464        exec_output += NeonEqualRegExecute.subst(iop)
1465        for type in types:
1466            substDict = { "targs" : type,
1467                          "class_name" : Name }
1468            exec_output += NeonExecDeclare.subst(substDict)
1469
1470    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471        global header_output, exec_output
1472        eWalkCode = simdEnabledCheckCode + '''
1473        RegVect srcReg1;
1474        BigRegVect destReg;
1475        '''
1476        for reg in range(2):
1477            eWalkCode += '''
1478                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479            ''' % { "reg" : reg }
1480        if readDest:
1481            for reg in range(4):
1482                eWalkCode += '''
1483                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484                ''' % { "reg" : reg }
1485        readDestCode = ''
1486        if readDest:
1487            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1488        eWalkCode += '''
1489        for (unsigned i = 0; i < eCount; i++) {
1490            Element srcElem1 = gtoh(srcReg1.elements[i]);
1491            BigElement destElem;
1492            %(readDest)s
1493            %(op)s
1494            destReg.elements[i] = htog(destElem);
1495        }
1496        ''' % { "op" : op, "readDest" : readDestCode }
1497        for reg in range(4):
1498            eWalkCode += '''
1499            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500            ''' % { "reg" : reg }
1501        iop = InstObjParams(name, Name,
1502                            "RegRegOp",
1503                            { "code": eWalkCode,
1504                              "r_count": 2,
1505                              "predicate_test": predicateTest,
1506                              "op_class": opClass }, [])
1507        header_output += NeonRegRegOpDeclare.subst(iop)
1508        exec_output += NeonUnequalRegExecute.subst(iop)
1509        for type in types:
1510            substDict = { "targs" : type,
1511                          "class_name" : Name }
1512            exec_output += NeonExecDeclare.subst(substDict)
1513
1514    vhaddCode = '''
1515        Element carryBit =
1516            (((unsigned)srcElem1 & 0x1) +
1517             ((unsigned)srcElem2 & 0x1)) >> 1;
1518        // Use division instead of a shift to ensure the sign extension works
1519        // right. The compiler will figure out if it can be a shift. Mask the
1520        // inputs so they get truncated correctly.
1521        destElem = (((srcElem1 & ~(Element)1) / 2) +
1522                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1523    '''
1524    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1526
1527    vrhaddCode = '''
1528        Element carryBit =
1529            (((unsigned)srcElem1 & 0x1) +
1530             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531        // Use division instead of a shift to ensure the sign extension works
1532        // right. The compiler will figure out if it can be a shift. Mask the
1533        // inputs so they get truncated correctly.
1534        destElem = (((srcElem1 & ~(Element)1) / 2) +
1535                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1536    '''
1537    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1539
1540    vhsubCode = '''
1541        Element barrowBit =
1542            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543        // Use division instead of a shift to ensure the sign extension works
1544        // right. The compiler will figure out if it can be a shift. Mask the
1545        // inputs so they get truncated correctly.
1546        destElem = (((srcElem1 & ~(Element)1) / 2) -
1547                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1548    '''
1549    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1551
1552    vandCode = '''
1553        destElem = srcElem1 & srcElem2;
1554    '''
1555    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1557
1558    vbicCode = '''
1559        destElem = srcElem1 & ~srcElem2;
1560    '''
1561    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1563
1564    vorrCode = '''
1565        destElem = srcElem1 | srcElem2;
1566    '''
1567    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1569
1570    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1572
1573    vornCode = '''
1574        destElem = srcElem1 | ~srcElem2;
1575    '''
1576    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1578
1579    veorCode = '''
1580        destElem = srcElem1 ^ srcElem2;
1581    '''
1582    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1584
1585    vbifCode = '''
1586        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1587    '''
1588    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1590    vbitCode = '''
1591        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1592    '''
1593    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1595    vbslCode = '''
1596        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1597    '''
1598    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1600
1601    vmaxCode = '''
1602        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1603    '''
1604    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1606
1607    vminCode = '''
1608        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1609    '''
1610    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613    vaddCode = '''
1614        destElem = srcElem1 + srcElem2;
1615    '''
1616    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1619    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1620                      2, vaddCode, pairwise=True)
1621    vaddlwCode = '''
1622        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1623    '''
1624    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1625    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1626    vaddhnCode = '''
1627        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1628                   (sizeof(Element) * 8);
1629    '''
1630    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1631    vraddhnCode = '''
1632        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1633                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1634                   (sizeof(Element) * 8);
1635    '''
1636    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1637
1638    vsubCode = '''
1639        destElem = srcElem1 - srcElem2;
1640    '''
1641    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1642    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1643    vsublwCode = '''
1644        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1645    '''
1646    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1647    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1648
1649    vqaddUCode = '''
1650        destElem = srcElem1 + srcElem2;
1651        FPSCR fpscr = (FPSCR) FpscrQc;
1652        if (destElem < srcElem1 || destElem < srcElem2) {
1653            destElem = (Element)(-1);
1654            fpscr.qc = 1;
1655        }
1656        FpscrQc = fpscr;
1657    '''
1658    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1659    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1660    vsubhnCode = '''
1661        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1662                   (sizeof(Element) * 8);
1663    '''
1664    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1665    vrsubhnCode = '''
1666        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1667                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1668                   (sizeof(Element) * 8);
1669    '''
1670    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1671
1672    vqaddSCode = '''
1673        destElem = srcElem1 + srcElem2;
1674        FPSCR fpscr = (FPSCR) FpscrQc;
1675        bool negDest = (destElem < 0);
1676        bool negSrc1 = (srcElem1 < 0);
1677        bool negSrc2 = (srcElem2 < 0);
1678        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1679            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1680            if (negDest)
1681                destElem -= 1;
1682            fpscr.qc = 1;
1683        }
1684        FpscrQc = fpscr;
1685    '''
1686    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1687    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1688
1689    vqsubUCode = '''
1690        destElem = srcElem1 - srcElem2;
1691        FPSCR fpscr = (FPSCR) FpscrQc;
1692        if (destElem > srcElem1) {
1693            destElem = 0;
1694            fpscr.qc = 1;
1695        }
1696        FpscrQc = fpscr;
1697    '''
1698    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1699    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1700
1701    vqsubSCode = '''
1702        destElem = srcElem1 - srcElem2;
1703        FPSCR fpscr = (FPSCR) FpscrQc;
1704        bool negDest = (destElem < 0);
1705        bool negSrc1 = (srcElem1 < 0);
1706        bool posSrc2 = (srcElem2 >= 0);
1707        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1708            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1709            if (negDest)
1710                destElem -= 1;
1711            fpscr.qc = 1;
1712        }
1713        FpscrQc = fpscr;
1714    '''
1715    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1716    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1717
1718    vcgtCode = '''
1719        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1720    '''
1721    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1722    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1723
1724    vcgeCode = '''
1725        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1726    '''
1727    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1728    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1729
1730    vceqCode = '''
1731        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1732    '''
1733    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1734    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1735
1736    vshlCode = '''
1737        int16_t shiftAmt = (int8_t)srcElem2;
1738        if (shiftAmt < 0) {
1739            shiftAmt = -shiftAmt;
1740            if (shiftAmt >= sizeof(Element) * 8) {
1741                shiftAmt = sizeof(Element) * 8 - 1;
1742                destElem = 0;
1743            } else {
1744                destElem = (srcElem1 >> shiftAmt);
1745            }
1746            // Make sure the right shift sign extended when it should.
1747            if (ltz(srcElem1) && !ltz(destElem)) {
1748                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1749                                             1 - shiftAmt));
1750            }
1751        } else {
1752            if (shiftAmt >= sizeof(Element) * 8) {
1753                destElem = 0;
1754            } else {
1755                destElem = srcElem1 << shiftAmt;
1756            }
1757        }
1758    '''
1759    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1760    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1761
1762    vrshlCode = '''
1763        int16_t shiftAmt = (int8_t)srcElem2;
1764        if (shiftAmt < 0) {
1765            shiftAmt = -shiftAmt;
1766            Element rBit = 0;
1767            if (shiftAmt <= sizeof(Element) * 8)
1768                rBit = bits(srcElem1, shiftAmt - 1);
1769            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1770                rBit = 1;
1771            if (shiftAmt >= sizeof(Element) * 8) {
1772                shiftAmt = sizeof(Element) * 8 - 1;
1773                destElem = 0;
1774            } else {
1775                destElem = (srcElem1 >> shiftAmt);
1776            }
1777            // Make sure the right shift sign extended when it should.
1778            if (ltz(srcElem1) && !ltz(destElem)) {
1779                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1780                                             1 - shiftAmt));
1781            }
1782            destElem += rBit;
1783        } else if (shiftAmt > 0) {
1784            if (shiftAmt >= sizeof(Element) * 8) {
1785                destElem = 0;
1786            } else {
1787                destElem = srcElem1 << shiftAmt;
1788            }
1789        } else {
1790            destElem = srcElem1;
1791        }
1792    '''
1793    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1794    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1795
1796    vqshlUCode = '''
1797        int16_t shiftAmt = (int8_t)srcElem2;
1798        FPSCR fpscr = (FPSCR) FpscrQc;
1799        if (shiftAmt < 0) {
1800            shiftAmt = -shiftAmt;
1801            if (shiftAmt >= sizeof(Element) * 8) {
1802                shiftAmt = sizeof(Element) * 8 - 1;
1803                destElem = 0;
1804            } else {
1805                destElem = (srcElem1 >> shiftAmt);
1806            }
1807        } else if (shiftAmt > 0) {
1808            if (shiftAmt >= sizeof(Element) * 8) {
1809                if (srcElem1 != 0) {
1810                    destElem = mask(sizeof(Element) * 8);
1811                    fpscr.qc = 1;
1812                } else {
1813                    destElem = 0;
1814                }
1815            } else {
1816                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1817                            sizeof(Element) * 8 - shiftAmt)) {
1818                    destElem = mask(sizeof(Element) * 8);
1819                    fpscr.qc = 1;
1820                } else {
1821                    destElem = srcElem1 << shiftAmt;
1822                }
1823            }
1824        } else {
1825            destElem = srcElem1;
1826        }
1827        FpscrQc = fpscr;
1828    '''
1829    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1830    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1831
1832    vqshlSCode = '''
1833        int16_t shiftAmt = (int8_t)srcElem2;
1834        FPSCR fpscr = (FPSCR) FpscrQc;
1835        if (shiftAmt < 0) {
1836            shiftAmt = -shiftAmt;
1837            if (shiftAmt >= sizeof(Element) * 8) {
1838                shiftAmt = sizeof(Element) * 8 - 1;
1839                destElem = 0;
1840            } else {
1841                destElem = (srcElem1 >> shiftAmt);
1842            }
1843            // Make sure the right shift sign extended when it should.
1844            if (srcElem1 < 0 && destElem >= 0) {
1845                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1846                                             1 - shiftAmt));
1847            }
1848        } else if (shiftAmt > 0) {
1849            bool sat = false;
1850            if (shiftAmt >= sizeof(Element) * 8) {
1851                if (srcElem1 != 0)
1852                    sat = true;
1853                else
1854                    destElem = 0;
1855            } else {
1856                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1857                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1858                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1859                    sat = true;
1860                } else {
1861                    destElem = srcElem1 << shiftAmt;
1862                }
1863            }
1864            if (sat) {
1865                fpscr.qc = 1;
1866                destElem = mask(sizeof(Element) * 8 - 1);
1867                if (srcElem1 < 0)
1868                    destElem = ~destElem;
1869            }
1870        } else {
1871            destElem = srcElem1;
1872        }
1873        FpscrQc = fpscr;
1874    '''
1875    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1876    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1877
1878    vqrshlUCode = '''
1879        int16_t shiftAmt = (int8_t)srcElem2;
1880        FPSCR fpscr = (FPSCR) FpscrQc;
1881        if (shiftAmt < 0) {
1882            shiftAmt = -shiftAmt;
1883            Element rBit = 0;
1884            if (shiftAmt <= sizeof(Element) * 8)
1885                rBit = bits(srcElem1, shiftAmt - 1);
1886            if (shiftAmt >= sizeof(Element) * 8) {
1887                shiftAmt = sizeof(Element) * 8 - 1;
1888                destElem = 0;
1889            } else {
1890                destElem = (srcElem1 >> shiftAmt);
1891            }
1892            destElem += rBit;
1893        } else {
1894            if (shiftAmt >= sizeof(Element) * 8) {
1895                if (srcElem1 != 0) {
1896                    destElem = mask(sizeof(Element) * 8);
1897                    fpscr.qc = 1;
1898                } else {
1899                    destElem = 0;
1900                }
1901            } else {
1902                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1903                            sizeof(Element) * 8 - shiftAmt)) {
1904                    destElem = mask(sizeof(Element) * 8);
1905                    fpscr.qc = 1;
1906                } else {
1907                    destElem = srcElem1 << shiftAmt;
1908                }
1909            }
1910        }
1911        FpscrQc = fpscr;
1912    '''
1913    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1914    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1915
1916    vqrshlSCode = '''
1917        int16_t shiftAmt = (int8_t)srcElem2;
1918        FPSCR fpscr = (FPSCR) FpscrQc;
1919        if (shiftAmt < 0) {
1920            shiftAmt = -shiftAmt;
1921            Element rBit = 0;
1922            if (shiftAmt <= sizeof(Element) * 8)
1923                rBit = bits(srcElem1, shiftAmt - 1);
1924            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1925                rBit = 1;
1926            if (shiftAmt >= sizeof(Element) * 8) {
1927                shiftAmt = sizeof(Element) * 8 - 1;
1928                destElem = 0;
1929            } else {
1930                destElem = (srcElem1 >> shiftAmt);
1931            }
1932            // Make sure the right shift sign extended when it should.
1933            if (srcElem1 < 0 && destElem >= 0) {
1934                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1935                                             1 - shiftAmt));
1936            }
1937            destElem += rBit;
1938        } else if (shiftAmt > 0) {
1939            bool sat = false;
1940            if (shiftAmt >= sizeof(Element) * 8) {
1941                if (srcElem1 != 0)
1942                    sat = true;
1943                else
1944                    destElem = 0;
1945            } else {
1946                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1947                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1948                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1949                    sat = true;
1950                } else {
1951                    destElem = srcElem1 << shiftAmt;
1952                }
1953            }
1954            if (sat) {
1955                fpscr.qc = 1;
1956                destElem = mask(sizeof(Element) * 8 - 1);
1957                if (srcElem1 < 0)
1958                    destElem = ~destElem;
1959            }
1960        } else {
1961            destElem = srcElem1;
1962        }
1963        FpscrQc = fpscr;
1964    '''
1965    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1966    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1967
1968    vabaCode = '''
1969        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1970                                            (srcElem2 - srcElem1);
1971    '''
1972    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1973    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1974    vabalCode = '''
1975        destElem += (srcElem1 > srcElem2) ?
1976            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1977            ((BigElement)srcElem2 - (BigElement)srcElem1);
1978    '''
1979    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1980
1981    vabdCode = '''
1982        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1983                                           (srcElem2 - srcElem1);
1984    '''
1985    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1986    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1987    vabdlCode = '''
1988        destElem = (srcElem1 > srcElem2) ?
1989            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1990            ((BigElement)srcElem2 - (BigElement)srcElem1);
1991    '''
1992    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1993
1994    vtstCode = '''
1995        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1996    '''
1997    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1998    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1999
2000    vmulCode = '''
2001        destElem = srcElem1 * srcElem2;
2002    '''
2003    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2004    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2005    vmullCode = '''
2006        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2007    '''
2008    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2009
2010    vmlaCode = '''
2011        destElem = destElem + srcElem1 * srcElem2;
2012    '''
2013    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2014    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2015    vmlalCode = '''
2016        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2017    '''
2018    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2019
2020    vqdmlalCode = '''
2021        FPSCR fpscr = (FPSCR) FpscrQc;
2022        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2023        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2024        Element halfNeg = maxNeg / 2;
2025        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2026            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2027            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2028            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2029            fpscr.qc = 1;
2030        }
2031        bool negPreDest = ltz(destElem);
2032        destElem += midElem;
2033        bool negDest = ltz(destElem);
2034        bool negMid = ltz(midElem);
2035        if (negPreDest == negMid && negMid != negDest) {
2036            destElem = mask(sizeof(BigElement) * 8 - 1);
2037            if (negPreDest)
2038                destElem = ~destElem;
2039            fpscr.qc = 1;
2040        }
2041        FpscrQc = fpscr;
2042    '''
2043    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2044
2045    vqdmlslCode = '''
2046        FPSCR fpscr = (FPSCR) FpscrQc;
2047        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2048        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2049        Element halfNeg = maxNeg / 2;
2050        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2051            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2052            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2053            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2054            fpscr.qc = 1;
2055        }
2056        bool negPreDest = ltz(destElem);
2057        destElem -= midElem;
2058        bool negDest = ltz(destElem);
2059        bool posMid = ltz((BigElement)-midElem);
2060        if (negPreDest == posMid && posMid != negDest) {
2061            destElem = mask(sizeof(BigElement) * 8 - 1);
2062            if (negPreDest)
2063                destElem = ~destElem;
2064            fpscr.qc = 1;
2065        }
2066        FpscrQc = fpscr;
2067    '''
2068    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2069
2070    vqdmullCode = '''
2071        FPSCR fpscr = (FPSCR) FpscrQc;
2072        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2073        if (srcElem1 == srcElem2 &&
2074                srcElem1 == (Element)((Element)1 <<
2075                    (Element)(sizeof(Element) * 8 - 1))) {
2076            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2077            fpscr.qc = 1;
2078        }
2079        FpscrQc = fpscr;
2080    '''
2081    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2082
2083    vmlsCode = '''
2084        destElem = destElem - srcElem1 * srcElem2;
2085    '''
2086    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2087    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2088    vmlslCode = '''
2089        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2090    '''
2091    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2092
2093    vmulpCode = '''
2094        destElem = 0;
2095        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2096            if (bits(srcElem2, j))
2097                destElem ^= srcElem1 << j;
2098        }
2099    '''
2100    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2101    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2102    vmullpCode = '''
2103        destElem = 0;
2104        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2105            if (bits(srcElem2, j))
2106                destElem ^= (BigElement)srcElem1 << j;
2107        }
2108    '''
2109    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2110
2111    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2112
2113    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2114
2115    vqdmulhCode = '''
2116        FPSCR fpscr = (FPSCR) FpscrQc;
2117        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2118                   (sizeof(Element) * 8);
2119        if (srcElem1 == srcElem2 &&
2120                srcElem1 == (Element)((Element)1 <<
2121                    (sizeof(Element) * 8 - 1))) {
2122            destElem = ~srcElem1;
2123            fpscr.qc = 1;
2124        }
2125        FpscrQc = fpscr;
2126    '''
2127    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2128    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2129
2130    vqrdmulhCode = '''
2131        FPSCR fpscr = (FPSCR) FpscrQc;
2132        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2133                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2134                   (sizeof(Element) * 8);
2135        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2136        Element halfNeg = maxNeg / 2;
2137        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2138            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2139            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2140            if (destElem < 0) {
2141                destElem = mask(sizeof(Element) * 8 - 1);
2142            } else {
2143                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2144            }
2145            fpscr.qc = 1;
2146        }
2147        FpscrQc = fpscr;
2148    '''
2149    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2150            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2151    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2152            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2153
2154    vmaxfpCode = '''
2155        FPSCR fpscr = (FPSCR) FpscrExc;
2156        bool done;
2157        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2158        if (!done) {
2159            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2160                               true, true, VfpRoundNearest);
2161        } else if (flushToZero(srcReg1, srcReg2)) {
2162            fpscr.idc = 1;
2163        }
2164        FpscrExc = fpscr;
2165    '''
2166    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2167    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2168
2169    vminfpCode = '''
2170        FPSCR fpscr = (FPSCR) FpscrExc;
2171        bool done;
2172        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2173        if (!done) {
2174            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2175                               true, true, VfpRoundNearest);
2176        } else if (flushToZero(srcReg1, srcReg2)) {
2177            fpscr.idc = 1;
2178        }
2179        FpscrExc = fpscr;
2180    '''
2181    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2182    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2183
2184    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2185                        2, vmaxfpCode, pairwise=True)
2186    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2187                        4, vmaxfpCode, pairwise=True)
2188
2189    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2190                        2, vminfpCode, pairwise=True)
2191    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2192                        4, vminfpCode, pairwise=True)
2193
2194    vaddfpCode = '''
2195        FPSCR fpscr = (FPSCR) FpscrExc;
2196        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2197                           true, true, VfpRoundNearest);
2198        FpscrExc = fpscr;
2199    '''
2200    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2201    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2202
2203    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2204                        2, vaddfpCode, pairwise=True)
2205    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2206                        4, vaddfpCode, pairwise=True)
2207
2208    vsubfpCode = '''
2209        FPSCR fpscr = (FPSCR) FpscrExc;
2210        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2211                           true, true, VfpRoundNearest);
2212        FpscrExc = fpscr;
2213    '''
2214    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2215    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2216
2217    vmulfpCode = '''
2218        FPSCR fpscr = (FPSCR) FpscrExc;
2219        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2220                           true, true, VfpRoundNearest);
2221        FpscrExc = fpscr;
2222    '''
2223    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2224    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2225
2226    vmlafpCode = '''
2227        FPSCR fpscr = (FPSCR) FpscrExc;
2228        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2229                             true, true, VfpRoundNearest);
2230        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2231                           true, true, VfpRoundNearest);
2232        FpscrExc = fpscr;
2233    '''
2234    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2235    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2236
2237    vmlsfpCode = '''
2238        FPSCR fpscr = (FPSCR) FpscrExc;
2239        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2240                             true, true, VfpRoundNearest);
2241        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2242                           true, true, VfpRoundNearest);
2243        FpscrExc = fpscr;
2244    '''
2245    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2246    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2247
2248    vcgtfpCode = '''
2249        FPSCR fpscr = (FPSCR) FpscrExc;
2250        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2251                             true, true, VfpRoundNearest);
2252        destReg = (res == 0) ? -1 : 0;
2253        if (res == 2.0)
2254            fpscr.ioc = 1;
2255        FpscrExc = fpscr;
2256    '''
2257    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2258            2, vcgtfpCode, toInt = True)
2259    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2260            4, vcgtfpCode, toInt = True)
2261
2262    vcgefpCode = '''
2263        FPSCR fpscr = (FPSCR) FpscrExc;
2264        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2265                             true, true, VfpRoundNearest);
2266        destReg = (res == 0) ? -1 : 0;
2267        if (res == 2.0)
2268            fpscr.ioc = 1;
2269        FpscrExc = fpscr;
2270    '''
2271    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2272            2, vcgefpCode, toInt = True)
2273    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2274            4, vcgefpCode, toInt = True)
2275
2276    vacgtfpCode = '''
2277        FPSCR fpscr = (FPSCR) FpscrExc;
2278        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2279                             true, true, VfpRoundNearest);
2280        destReg = (res == 0) ? -1 : 0;
2281        if (res == 2.0)
2282            fpscr.ioc = 1;
2283        FpscrExc = fpscr;
2284    '''
2285    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2286            2, vacgtfpCode, toInt = True)
2287    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2288            4, vacgtfpCode, toInt = True)
2289
2290    vacgefpCode = '''
2291        FPSCR fpscr = (FPSCR) FpscrExc;
2292        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2293                             true, true, VfpRoundNearest);
2294        destReg = (res == 0) ? -1 : 0;
2295        if (res == 2.0)
2296            fpscr.ioc = 1;
2297        FpscrExc = fpscr;
2298    '''
2299    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2300            2, vacgefpCode, toInt = True)
2301    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2302            4, vacgefpCode, toInt = True)
2303
2304    vceqfpCode = '''
2305        FPSCR fpscr = (FPSCR) FpscrExc;
2306        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2307                             true, true, VfpRoundNearest);
2308        destReg = (res == 0) ? -1 : 0;
2309        if (res == 2.0)
2310            fpscr.ioc = 1;
2311        FpscrExc = fpscr;
2312    '''
2313    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2314            2, vceqfpCode, toInt = True)
2315    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2316            4, vceqfpCode, toInt = True)
2317
2318    vrecpsCode = '''
2319        FPSCR fpscr = (FPSCR) FpscrExc;
2320        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2321                           true, true, VfpRoundNearest);
2322        FpscrExc = fpscr;
2323    '''
2324    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2325    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2326
2327    vrsqrtsCode = '''
2328        FPSCR fpscr = (FPSCR) FpscrExc;
2329        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2330                           true, true, VfpRoundNearest);
2331        FpscrExc = fpscr;
2332    '''
2333    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2334    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2335
2336    vabdfpCode = '''
2337        FPSCR fpscr = (FPSCR) FpscrExc;
2338        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2339                             true, true, VfpRoundNearest);
2340        destReg = fabs(mid);
2341        FpscrExc = fpscr;
2342    '''
2343    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2344    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2345
2346    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2347    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2348    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2349    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2350    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2351
2352    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2353    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2354    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2355    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2356    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2357
2358    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2359    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2360    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2361    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2362    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2363
2364    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2365    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2366    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2367    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2368    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2369    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2370            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2371    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2372            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2373
2374    vshrCode = '''
2375        if (imm >= sizeof(srcElem1) * 8) {
2376            if (ltz(srcElem1))
2377                destElem = -1;
2378            else
2379                destElem = 0;
2380        } else {
2381            destElem = srcElem1 >> imm;
2382        }
2383    '''
2384    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2385    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2386
2387    vsraCode = '''
2388        Element mid;;
2389        if (imm >= sizeof(srcElem1) * 8) {
2390            mid = ltz(srcElem1) ? -1 : 0;
2391        } else {
2392            mid = srcElem1 >> imm;
2393            if (ltz(srcElem1) && !ltz(mid)) {
2394                mid |= -(mid & ((Element)1 <<
2395                            (sizeof(Element) * 8 - 1 - imm)));
2396            }
2397        }
2398        destElem += mid;
2399    '''
2400    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2401    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2402
2403    vrshrCode = '''
2404        if (imm > sizeof(srcElem1) * 8) {
2405            destElem = 0;
2406        } else if (imm) {
2407            Element rBit = bits(srcElem1, imm - 1);
2408            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2409        } else {
2410            destElem = srcElem1;
2411        }
2412    '''
2413    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2414    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2415
2416    vrsraCode = '''
2417        if (imm > sizeof(srcElem1) * 8) {
2418            destElem += 0;
2419        } else if (imm) {
2420            Element rBit = bits(srcElem1, imm - 1);
2421            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2422        } else {
2423            destElem += srcElem1;
2424        }
2425    '''
2426    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2427    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2428
2429    vsriCode = '''
2430        if (imm >= sizeof(Element) * 8)
2431            destElem = destElem;
2432        else
2433            destElem = (srcElem1 >> imm) |
2434                (destElem & ~mask(sizeof(Element) * 8 - imm));
2435    '''
2436    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2437    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2438
2439    vshlCode = '''
2440        if (imm >= sizeof(Element) * 8)
2441            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2442        else
2443            destElem = srcElem1 << imm;
2444    '''
2445    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2446    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2447
2448    vsliCode = '''
2449        if (imm >= sizeof(Element) * 8)
2450            destElem = destElem;
2451        else
2452            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2453    '''
2454    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2455    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2456
2457    vqshlCode = '''
2458        FPSCR fpscr = (FPSCR) FpscrQc;
2459        if (imm >= sizeof(Element) * 8) {
2460            if (srcElem1 != 0) {
2461                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2462                if (srcElem1 > 0)
2463                    destElem = ~destElem;
2464                fpscr.qc = 1;
2465            } else {
2466                destElem = 0;
2467            }
2468        } else if (imm) {
2469            destElem = (srcElem1 << imm);
2470            uint64_t topBits = bits((uint64_t)srcElem1,
2471                                    sizeof(Element) * 8 - 1,
2472                                    sizeof(Element) * 8 - 1 - imm);
2473            if (topBits != 0 && topBits != mask(imm + 1)) {
2474                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2475                if (srcElem1 > 0)
2476                    destElem = ~destElem;
2477                fpscr.qc = 1;
2478            }
2479        } else {
2480            destElem = srcElem1;
2481        }
2482        FpscrQc = fpscr;
2483    '''
2484    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2485    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2486
2487    vqshluCode = '''
2488        FPSCR fpscr = (FPSCR) FpscrQc;
2489        if (imm >= sizeof(Element) * 8) {
2490            if (srcElem1 != 0) {
2491                destElem = mask(sizeof(Element) * 8);
2492                fpscr.qc = 1;
2493            } else {
2494                destElem = 0;
2495            }
2496        } else if (imm) {
2497            destElem = (srcElem1 << imm);
2498            uint64_t topBits = bits((uint64_t)srcElem1,
2499                                    sizeof(Element) * 8 - 1,
2500                                    sizeof(Element) * 8 - imm);
2501            if (topBits != 0) {
2502                destElem = mask(sizeof(Element) * 8);
2503                fpscr.qc = 1;
2504            }
2505        } else {
2506            destElem = srcElem1;
2507        }
2508        FpscrQc = fpscr;
2509    '''
2510    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2511    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2512
2513    vqshlusCode = '''
2514        FPSCR fpscr = (FPSCR) FpscrQc;
2515        if (imm >= sizeof(Element) * 8) {
2516            if (srcElem1 < 0) {
2517                destElem = 0;
2518                fpscr.qc = 1;
2519            } else if (srcElem1 > 0) {
2520                destElem = mask(sizeof(Element) * 8);
2521                fpscr.qc = 1;
2522            } else {
2523                destElem = 0;
2524            }
2525        } else if (imm) {
2526            destElem = (srcElem1 << imm);
2527            uint64_t topBits = bits((uint64_t)srcElem1,
2528                                    sizeof(Element) * 8 - 1,
2529                                    sizeof(Element) * 8 - imm);
2530            if (srcElem1 < 0) {
2531                destElem = 0;
2532                fpscr.qc = 1;
2533            } else if (topBits != 0) {
2534                destElem = mask(sizeof(Element) * 8);
2535                fpscr.qc = 1;
2536            }
2537        } else {
2538            if (srcElem1 < 0) {
2539                fpscr.qc = 1;
2540                destElem = 0;
2541            } else {
2542                destElem = srcElem1;
2543            }
2544        }
2545        FpscrQc = fpscr;
2546    '''
2547    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2548    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2549
2550    vshrnCode = '''
2551        if (imm >= sizeof(srcElem1) * 8) {
2552            destElem = 0;
2553        } else {
2554            destElem = srcElem1 >> imm;
2555        }
2556    '''
2557    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2558
2559    vrshrnCode = '''
2560        if (imm > sizeof(srcElem1) * 8) {
2561            destElem = 0;
2562        } else if (imm) {
2563            Element rBit = bits(srcElem1, imm - 1);
2564            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2565        } else {
2566            destElem = srcElem1;
2567        }
2568    '''
2569    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2570
2571    vqshrnCode = '''
2572        FPSCR fpscr = (FPSCR) FpscrQc;
2573        if (imm > sizeof(srcElem1) * 8) {
2574            if (srcElem1 != 0 && srcElem1 != -1)
2575                fpscr.qc = 1;
2576            destElem = 0;
2577        } else if (imm) {
2578            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2579            mid |= -(mid & ((BigElement)1 <<
2580                        (sizeof(BigElement) * 8 - 1 - imm)));
2581            if (mid != (Element)mid) {
2582                destElem = mask(sizeof(Element) * 8 - 1);
2583                if (srcElem1 < 0)
2584                    destElem = ~destElem;
2585                fpscr.qc = 1;
2586            } else {
2587                destElem = mid;
2588            }
2589        } else {
2590            destElem = srcElem1;
2591        }
2592        FpscrQc = fpscr;
2593    '''
2594    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2595
2596    vqshrunCode = '''
2597        FPSCR fpscr = (FPSCR) FpscrQc;
2598        if (imm > sizeof(srcElem1) * 8) {
2599            if (srcElem1 != 0)
2600                fpscr.qc = 1;
2601            destElem = 0;
2602        } else if (imm) {
2603            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2604            if (mid != (Element)mid) {
2605                destElem = mask(sizeof(Element) * 8);
2606                fpscr.qc = 1;
2607            } else {
2608                destElem = mid;
2609            }
2610        } else {
2611            destElem = srcElem1;
2612        }
2613        FpscrQc = fpscr;
2614    '''
2615    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2616                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2617
2618    vqshrunsCode = '''
2619        FPSCR fpscr = (FPSCR) FpscrQc;
2620        if (imm > sizeof(srcElem1) * 8) {
2621            if (srcElem1 != 0)
2622                fpscr.qc = 1;
2623            destElem = 0;
2624        } else if (imm) {
2625            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2626            if (bits(mid, sizeof(BigElement) * 8 - 1,
2627                          sizeof(Element) * 8) != 0) {
2628                if (srcElem1 < 0) {
2629                    destElem = 0;
2630                } else {
2631                    destElem = mask(sizeof(Element) * 8);
2632                }
2633                fpscr.qc = 1;
2634            } else {
2635                destElem = mid;
2636            }
2637        } else {
2638            destElem = srcElem1;
2639        }
2640        FpscrQc = fpscr;
2641    '''
2642    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2643                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2644
2645    vqrshrnCode = '''
2646        FPSCR fpscr = (FPSCR) FpscrQc;
2647        if (imm > sizeof(srcElem1) * 8) {
2648            if (srcElem1 != 0 && srcElem1 != -1)
2649                fpscr.qc = 1;
2650            destElem = 0;
2651        } else if (imm) {
2652            BigElement mid = (srcElem1 >> (imm - 1));
2653            uint64_t rBit = mid & 0x1;
2654            mid >>= 1;
2655            mid |= -(mid & ((BigElement)1 <<
2656                        (sizeof(BigElement) * 8 - 1 - imm)));
2657            mid += rBit;
2658            if (mid != (Element)mid) {
2659                destElem = mask(sizeof(Element) * 8 - 1);
2660                if (srcElem1 < 0)
2661                    destElem = ~destElem;
2662                fpscr.qc = 1;
2663            } else {
2664                destElem = mid;
2665            }
2666        } else {
2667            if (srcElem1 != (Element)srcElem1) {
2668                destElem = mask(sizeof(Element) * 8 - 1);
2669                if (srcElem1 < 0)
2670                    destElem = ~destElem;
2671                fpscr.qc = 1;
2672            } else {
2673                destElem = srcElem1;
2674            }
2675        }
2676        FpscrQc = fpscr;
2677    '''
2678    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2679                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2680
2681    vqrshrunCode = '''
2682        FPSCR fpscr = (FPSCR) FpscrQc;
2683        if (imm > sizeof(srcElem1) * 8) {
2684            if (srcElem1 != 0)
2685                fpscr.qc = 1;
2686            destElem = 0;
2687        } else if (imm) {
2688            BigElement mid = (srcElem1 >> (imm - 1));
2689            uint64_t rBit = mid & 0x1;
2690            mid >>= 1;
2691            mid += rBit;
2692            if (mid != (Element)mid) {
2693                destElem = mask(sizeof(Element) * 8);
2694                fpscr.qc = 1;
2695            } else {
2696                destElem = mid;
2697            }
2698        } else {
2699            if (srcElem1 != (Element)srcElem1) {
2700                destElem = mask(sizeof(Element) * 8 - 1);
2701                fpscr.qc = 1;
2702            } else {
2703                destElem = srcElem1;
2704            }
2705        }
2706        FpscrQc = fpscr;
2707    '''
2708    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2709                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2710
2711    vqrshrunsCode = '''
2712        FPSCR fpscr = (FPSCR) FpscrQc;
2713        if (imm > sizeof(srcElem1) * 8) {
2714            if (srcElem1 != 0)
2715                fpscr.qc = 1;
2716            destElem = 0;
2717        } else if (imm) {
2718            BigElement mid = (srcElem1 >> (imm - 1));
2719            uint64_t rBit = mid & 0x1;
2720            mid >>= 1;
2721            mid |= -(mid & ((BigElement)1 <<
2722                            (sizeof(BigElement) * 8 - 1 - imm)));
2723            mid += rBit;
2724            if (bits(mid, sizeof(BigElement) * 8 - 1,
2725                          sizeof(Element) * 8) != 0) {
2726                if (srcElem1 < 0) {
2727                    destElem = 0;
2728                } else {
2729                    destElem = mask(sizeof(Element) * 8);
2730                }
2731                fpscr.qc = 1;
2732            } else {
2733                destElem = mid;
2734            }
2735        } else {
2736            if (srcElem1 < 0) {
2737                fpscr.qc = 1;
2738                destElem = 0;
2739            } else {
2740                destElem = srcElem1;
2741            }
2742        }
2743        FpscrQc = fpscr;
2744    '''
2745    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2746                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2747
2748    vshllCode = '''
2749        if (imm >= sizeof(destElem) * 8) {
2750            destElem = 0;
2751        } else {
2752            destElem = (BigElement)srcElem1 << imm;
2753        }
2754    '''
2755    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2756
2757    vmovlCode = '''
2758        destElem = srcElem1;
2759    '''
2760    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2761
2762    vcvt2ufxCode = '''
2763        FPSCR fpscr = (FPSCR) FpscrExc;
2764        if (flushToZero(srcElem1))
2765            fpscr.idc = 1;
2766        VfpSavedState state = prepFpState(VfpRoundNearest);
2767        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2768        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2769        __asm__ __volatile__("" :: "m" (destReg));
2770        finishVfp(fpscr, state, true);
2771        FpscrExc = fpscr;
2772    '''
2773    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2774            2, vcvt2ufxCode, toInt = True)
2775    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2776            4, vcvt2ufxCode, toInt = True)
2777
2778    vcvt2sfxCode = '''
2779        FPSCR fpscr = (FPSCR) FpscrExc;
2780        if (flushToZero(srcElem1))
2781            fpscr.idc = 1;
2782        VfpSavedState state = prepFpState(VfpRoundNearest);
2783        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2784        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2785        __asm__ __volatile__("" :: "m" (destReg));
2786        finishVfp(fpscr, state, true);
2787        FpscrExc = fpscr;
2788    '''
2789    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2790            2, vcvt2sfxCode, toInt = True)
2791    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2792            4, vcvt2sfxCode, toInt = True)
2793
2794    vcvtu2fpCode = '''
2795        FPSCR fpscr = (FPSCR) FpscrExc;
2796        VfpSavedState state = prepFpState(VfpRoundNearest);
2797        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2799        __asm__ __volatile__("" :: "m" (destElem));
2800        finishVfp(fpscr, state, true);
2801        FpscrExc = fpscr;
2802    '''
2803    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2804            2, vcvtu2fpCode, fromInt = True)
2805    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2806            4, vcvtu2fpCode, fromInt = True)
2807
2808    vcvts2fpCode = '''
2809        FPSCR fpscr = (FPSCR) FpscrExc;
2810        VfpSavedState state = prepFpState(VfpRoundNearest);
2811        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2812        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2813        __asm__ __volatile__("" :: "m" (destElem));
2814        finishVfp(fpscr, state, true);
2815        FpscrExc = fpscr;
2816    '''
2817    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2818            2, vcvts2fpCode, fromInt = True)
2819    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2820            4, vcvts2fpCode, fromInt = True)
2821
2822    vcvts2hCode = '''
2823        FPSCR fpscr = (FPSCR) FpscrExc;
2824        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2825        if (flushToZero(srcFp1))
2826            fpscr.idc = 1;
2827        VfpSavedState state = prepFpState(VfpRoundNearest);
2828        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2829                                : "m" (srcFp1), "m" (destElem));
2830        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2831                              fpscr.ahp, srcFp1);
2832        __asm__ __volatile__("" :: "m" (destElem));
2833        finishVfp(fpscr, state, true);
2834        FpscrExc = fpscr;
2835    '''
2836    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2837
2838    vcvth2sCode = '''
2839        FPSCR fpscr = (FPSCR) FpscrExc;
2840        VfpSavedState state = prepFpState(VfpRoundNearest);
2841        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2842                                : "m" (srcElem1), "m" (destElem));
2843        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2844        __asm__ __volatile__("" :: "m" (destElem));
2845        finishVfp(fpscr, state, true);
2846        FpscrExc = fpscr;
2847    '''
2848    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2849
2850    vrsqrteCode = '''
2851        destElem = unsignedRSqrtEstimate(srcElem1);
2852    '''
2853    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2854    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2855
2856    vrsqrtefpCode = '''
2857        FPSCR fpscr = (FPSCR) FpscrExc;
2858        if (flushToZero(srcReg1))
2859            fpscr.idc = 1;
2860        destReg = fprSqrtEstimate(fpscr, srcReg1);
2861        FpscrExc = fpscr;
2862    '''
2863    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2864    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2865
2866    vrecpeCode = '''
2867        destElem = unsignedRecipEstimate(srcElem1);
2868    '''
2869    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2870    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2871
2872    vrecpefpCode = '''
2873        FPSCR fpscr = (FPSCR) FpscrExc;
2874        if (flushToZero(srcReg1))
2875            fpscr.idc = 1;
2876        destReg = fpRecipEstimate(fpscr, srcReg1);
2877        FpscrExc = fpscr;
2878    '''
2879    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2880    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2881
2882    vrev16Code = '''
2883        destElem = srcElem1;
2884        unsigned groupSize = ((1 << 1) / sizeof(Element));
2885        unsigned reverseMask = (groupSize - 1);
2886        j = i ^ reverseMask;
2887    '''
2888    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2889    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2890    vrev32Code = '''
2891        destElem = srcElem1;
2892        unsigned groupSize = ((1 << 2) / sizeof(Element));
2893        unsigned reverseMask = (groupSize - 1);
2894        j = i ^ reverseMask;
2895    '''
2896    twoRegMiscInst("vrev32", "NVrev32D",
2897            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2898    twoRegMiscInst("vrev32", "NVrev32Q",
2899            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2900    vrev64Code = '''
2901        destElem = srcElem1;
2902        unsigned groupSize = ((1 << 3) / sizeof(Element));
2903        unsigned reverseMask = (groupSize - 1);
2904        j = i ^ reverseMask;
2905    '''
2906    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2907    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2908
2909    vpaddlCode = '''
2910        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2911    '''
2912    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2913    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2914
2915    vpadalCode = '''
2916        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2917    '''
2918    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2919    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2920
2921    vclsCode = '''
2922        unsigned count = 0;
2923        if (srcElem1 < 0) {
2924            srcElem1 <<= 1;
2925            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2926                count++;
2927                srcElem1 <<= 1;
2928            }
2929        } else {
2930            srcElem1 <<= 1;
2931            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2932                count++;
2933                srcElem1 <<= 1;
2934            }
2935        }
2936        destElem = count;
2937    '''
2938    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2939    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2940
2941    vclzCode = '''
2942        unsigned count = 0;
2943        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2944            count++;
2945            srcElem1 <<= 1;
2946        }
2947        destElem = count;
2948    '''
2949    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2950    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2951
2952    vcntCode = '''
2953        unsigned count = 0;
2954        while (srcElem1 && count < sizeof(Element) * 8) {
2955            count += srcElem1 & 0x1;
2956            srcElem1 >>= 1;
2957        }
2958        destElem = count;
2959    '''
2960
2961    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2962    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2963
2964    vmvnCode = '''
2965        destElem = ~srcElem1;
2966    '''
2967    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2968    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2969
2970    vqabsCode = '''
2971        FPSCR fpscr = (FPSCR) FpscrQc;
2972        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2973            fpscr.qc = 1;
2974            destElem = ~srcElem1;
2975        } else if (srcElem1 < 0) {
2976            destElem = -srcElem1;
2977        } else {
2978            destElem = srcElem1;
2979        }
2980        FpscrQc = fpscr;
2981    '''
2982    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2983    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2984
2985    vqnegCode = '''
2986        FPSCR fpscr = (FPSCR) FpscrQc;
2987        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2988            fpscr.qc = 1;
2989            destElem = ~srcElem1;
2990        } else {
2991            destElem = -srcElem1;
2992        }
2993        FpscrQc = fpscr;
2994    '''
2995    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2996    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2997
2998    vabsCode = '''
2999        if (srcElem1 < 0) {
3000            destElem = -srcElem1;
3001        } else {
3002            destElem = srcElem1;
3003        }
3004    '''
3005
3006    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3007    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3008    vabsfpCode = '''
3009        union
3010        {
3011            uint32_t i;
3012            float f;
3013        } cStruct;
3014        cStruct.f = srcReg1;
3015        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3016        destReg = cStruct.f;
3017    '''
3018    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3019    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3020
3021    vnegCode = '''
3022        destElem = -srcElem1;
3023    '''
3024    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3025    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3026    vnegfpCode = '''
3027        destReg = -srcReg1;
3028    '''
3029    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3030    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3031
3032    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3033    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3034    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3035    vcgtfpCode = '''
3036        FPSCR fpscr = (FPSCR) FpscrExc;
3037        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3038                             true, true, VfpRoundNearest);
3039        destReg = (res == 0) ? -1 : 0;
3040        if (res == 2.0)
3041            fpscr.ioc = 1;
3042        FpscrExc = fpscr;
3043    '''
3044    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3045            2, vcgtfpCode, toInt = True)
3046    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3047            4, vcgtfpCode, toInt = True)
3048
3049    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3050    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3051    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3052    vcgefpCode = '''
3053        FPSCR fpscr = (FPSCR) FpscrExc;
3054        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3055                             true, true, VfpRoundNearest);
3056        destReg = (res == 0) ? -1 : 0;
3057        if (res == 2.0)
3058            fpscr.ioc = 1;
3059        FpscrExc = fpscr;
3060    '''
3061    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3062            2, vcgefpCode, toInt = True)
3063    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3064            4, vcgefpCode, toInt = True)
3065
3066    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3067    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3068    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3069    vceqfpCode = '''
3070        FPSCR fpscr = (FPSCR) FpscrExc;
3071        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3072                             true, true, VfpRoundNearest);
3073        destReg = (res == 0) ? -1 : 0;
3074        if (res == 2.0)
3075            fpscr.ioc = 1;
3076        FpscrExc = fpscr;
3077    '''
3078    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3079            2, vceqfpCode, toInt = True)
3080    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3081            4, vceqfpCode, toInt = True)
3082
3083    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3084    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3085    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3086    vclefpCode = '''
3087        FPSCR fpscr = (FPSCR) FpscrExc;
3088        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3089                             true, true, VfpRoundNearest);
3090        destReg = (res == 0) ? -1 : 0;
3091        if (res == 2.0)
3092            fpscr.ioc = 1;
3093        FpscrExc = fpscr;
3094    '''
3095    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3096            2, vclefpCode, toInt = True)
3097    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3098            4, vclefpCode, toInt = True)
3099
3100    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3101    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3102    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3103    vcltfpCode = '''
3104        FPSCR fpscr = (FPSCR) FpscrExc;
3105        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3106                             true, true, VfpRoundNearest);
3107        destReg = (res == 0) ? -1 : 0;
3108        if (res == 2.0)
3109            fpscr.ioc = 1;
3110        FpscrExc = fpscr;
3111    '''
3112    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3113            2, vcltfpCode, toInt = True)
3114    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3115            4, vcltfpCode, toInt = True)
3116
3117    vswpCode = '''
3118        FloatRegBits mid;
3119        for (unsigned r = 0; r < rCount; r++) {
3120            mid = srcReg1.regs[r];
3121            srcReg1.regs[r] = destReg.regs[r];
3122            destReg.regs[r] = mid;
3123        }
3124    '''
3125    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3126    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3127
3128    vtrnCode = '''
3129        Element mid;
3130        for (unsigned i = 0; i < eCount; i += 2) {
3131            mid = srcReg1.elements[i];
3132            srcReg1.elements[i] = destReg.elements[i + 1];
3133            destReg.elements[i + 1] = mid;
3134        }
3135    '''
3136    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3137            smallUnsignedTypes, 2, vtrnCode)
3138    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3139            smallUnsignedTypes, 4, vtrnCode)
3140
3141    vuzpCode = '''
3142        Element mid[eCount];
3143        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3144        for (unsigned i = 0; i < eCount / 2; i++) {
3145            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3146            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3147            destReg.elements[i] = destReg.elements[2 * i];
3148        }
3149        for (unsigned i = 0; i < eCount / 2; i++) {
3150            destReg.elements[eCount / 2 + i] = mid[2 * i];
3151        }
3152    '''
3153    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3154    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3155
3156    vzipCode = '''
3157        Element mid[eCount];
3158        memcpy(&mid, &destReg, sizeof(destReg));
3159        for (unsigned i = 0; i < eCount / 2; i++) {
3160            destReg.elements[2 * i] = mid[i];
3161            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3162        }
3163        for (int i = 0; i < eCount / 2; i++) {
3164            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3165            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3166        }
3167    '''
3168    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3169    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3170
3171    vmovnCode = 'destElem = srcElem1;'
3172    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3173
3174    vdupCode = 'destElem = srcElem1;'
3175    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3176    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3177
3178    def vdupGprInst(name, Name, opClass, types, rCount):
3179        global header_output, exec_output
3180        eWalkCode = '''
3181        RegVect destReg;
3182        for (unsigned i = 0; i < eCount; i++) {
3183            destReg.elements[i] = htog((Element)Op1);
3184        }
3185        '''
3186        for reg in range(rCount):
3187            eWalkCode += '''
3188            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3189            ''' % { "reg" : reg }
3190        iop = InstObjParams(name, Name,
3191                            "RegRegOp",
3192                            { "code": eWalkCode,
3193                              "r_count": rCount,
3194                              "predicate_test": predicateTest,
3195                              "op_class": opClass }, [])
3196        header_output += NeonRegRegOpDeclare.subst(iop)
3197        exec_output += NeonEqualRegExecute.subst(iop)
3198        for type in types:
3199            substDict = { "targs" : type,
3200                          "class_name" : Name }
3201            exec_output += NeonExecDeclare.subst(substDict)
3202    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3203    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3204
3205    vmovCode = 'destElem = imm;'
3206    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3207    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3208
3209    vorrCode = 'destElem |= imm;'
3210    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3211    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3212
3213    vmvnCode = 'destElem = ~imm;'
3214    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3215    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3216
3217    vbicCode = 'destElem &= ~imm;'
3218    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3219    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3220
3221    vqmovnCode = '''
3222    FPSCR fpscr = (FPSCR) FpscrQc;
3223    destElem = srcElem1;
3224    if ((BigElement)destElem != srcElem1) {
3225        fpscr.qc = 1;
3226        destElem = mask(sizeof(Element) * 8 - 1);
3227        if (srcElem1 < 0)
3228            destElem = ~destElem;
3229    }
3230    FpscrQc = fpscr;
3231    '''
3232    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3233
3234    vqmovunCode = '''
3235    FPSCR fpscr = (FPSCR) FpscrQc;
3236    destElem = srcElem1;
3237    if ((BigElement)destElem != srcElem1) {
3238        fpscr.qc = 1;
3239        destElem = mask(sizeof(Element) * 8);
3240    }
3241    FpscrQc = fpscr;
3242    '''
3243    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3244            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3245
3246    vqmovunsCode = '''
3247    FPSCR fpscr = (FPSCR) FpscrQc;
3248    destElem = srcElem1;
3249    if (srcElem1 < 0 ||
3250            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3251        fpscr.qc = 1;
3252        destElem = mask(sizeof(Element) * 8);
3253        if (srcElem1 < 0)
3254            destElem = ~destElem;
3255    }
3256    FpscrQc = fpscr;
3257    '''
3258    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3259            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3260
3261    def buildVext(name, Name, opClass, types, rCount, op):
3262        global header_output, exec_output
3263        eWalkCode = '''
3264        RegVect srcReg1, srcReg2, destReg;
3265        '''
3266        for reg in range(rCount):
3267            eWalkCode += simdEnabledCheckCode + '''
3268                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3269                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3270            ''' % { "reg" : reg }
3271        eWalkCode += op
3272        for reg in range(rCount):
3273            eWalkCode += '''
3274            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3275            ''' % { "reg" : reg }
3276        iop = InstObjParams(name, Name,
3277                            "RegRegRegImmOp",
3278                            { "code": eWalkCode,
3279                              "r_count": rCount,
3280                              "predicate_test": predicateTest,
3281                              "op_class": opClass }, [])
3282        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3283        exec_output += NeonEqualRegExecute.subst(iop)
3284        for type in types:
3285            substDict = { "targs" : type,
3286                          "class_name" : Name }
3287            exec_output += NeonExecDeclare.subst(substDict)
3288
3289    vextCode = '''
3290        for (unsigned i = 0; i < eCount; i++) {
3291            unsigned index = i + imm;
3292            if (index < eCount) {
3293                destReg.elements[i] = srcReg1.elements[index];
3294            } else {
3295                index -= eCount;
3296                if (index >= eCount) {
3297                    if (FullSystem)
3298                        fault = new UndefinedInstruction;
3299                    else
3300                        fault = new UndefinedInstruction(false, mnemonic);
3301                } else {
3302                    destReg.elements[i] = srcReg2.elements[index];
3303                }
3304            }
3305        }
3306    '''
3307    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3308    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3309
3310    def buildVtbxl(name, Name, opClass, length, isVtbl):
3311        global header_output, decoder_output, exec_output
3312        code = '''
3313            union
3314            {
3315                uint8_t bytes[32];
3316                FloatRegBits regs[8];
3317            } table;
3318
3319            union
3320            {
3321                uint8_t bytes[8];
3322                FloatRegBits regs[2];
3323            } destReg, srcReg2;
3324
3325            const unsigned length = %(length)d;
3326            const bool isVtbl = %(isVtbl)s;
3327
3328            srcReg2.regs[0] = htog(FpOp2P0_uw);
3329            srcReg2.regs[1] = htog(FpOp2P1_uw);
3330
3331            destReg.regs[0] = htog(FpDestP0_uw);
3332            destReg.regs[1] = htog(FpDestP1_uw);
3333        ''' % { "length" : length, "isVtbl" : isVtbl }
3334        for reg in range(8):
3335            if reg < length * 2:
3336                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3337                        { "reg" : reg }
3338            else:
3339                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3340        code += '''
3341        for (unsigned i = 0; i < sizeof(destReg); i++) {
3342            uint8_t index = srcReg2.bytes[i];
3343            if (index < 8 * length) {
3344                destReg.bytes[i] = table.bytes[index];
3345            } else {
3346                if (isVtbl)
3347                    destReg.bytes[i] = 0;
3348                // else destReg.bytes[i] unchanged
3349            }
3350        }
3351
3352        FpDestP0_uw = gtoh(destReg.regs[0]);
3353        FpDestP1_uw = gtoh(destReg.regs[1]);
3354        '''
3355        iop = InstObjParams(name, Name,
3356                            "RegRegRegOp",
3357                            { "code": code,
3358                              "predicate_test": predicateTest,
3359                              "op_class": opClass }, [])
3360        header_output += RegRegRegOpDeclare.subst(iop)
3361        decoder_output += RegRegRegOpConstructor.subst(iop)
3362        exec_output += PredOpExecute.subst(iop)
3363
3364    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3365    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3366    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3367    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3368
3369    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3370    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3371    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3372    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3373}};
3374