neon.isa revision 9557:8666e81607a6
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (std::isnan(op1) || std::isnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (std::isnan(op1) || std::isnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (std::isnan(op1) || std::isnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (std::isnan(op1) || std::isnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (std::isnan(op1) || std::isnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (std::isnan(op1) || std::isnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623
624    header_output = ""
625    exec_output = ""
626
627    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630    signedTypes = smallSignedTypes + ("int64_t",)
631    smallTypes = smallUnsignedTypes + smallSignedTypes
632    allTypes = unsignedTypes + signedTypes
633
634    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635                          readDest=False, pairwise=False):
636        global header_output, exec_output
637        eWalkCode = simdEnabledCheckCode + '''
638        RegVect srcReg1, srcReg2, destReg;
639        '''
640        for reg in range(rCount):
641            eWalkCode += '''
642                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644            ''' % { "reg" : reg }
645            if readDest:
646                eWalkCode += '''
647                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648                ''' % { "reg" : reg }
649        readDestCode = ''
650        if readDest:
651            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652        if pairwise:
653            eWalkCode += '''
654            for (unsigned i = 0; i < eCount; i++) {
655                Element srcElem1 = gtoh(2 * i < eCount ?
656                                        srcReg1.elements[2 * i] :
657                                        srcReg2.elements[2 * i - eCount]);
658                Element srcElem2 = gtoh(2 * i < eCount ?
659                                        srcReg1.elements[2 * i + 1] :
660                                        srcReg2.elements[2 * i + 1 - eCount]);
661                Element destElem;
662                %(readDest)s
663                %(op)s
664                destReg.elements[i] = htog(destElem);
665            }
666            ''' % { "op" : op, "readDest" : readDestCode }
667        else:
668            eWalkCode += '''
669            for (unsigned i = 0; i < eCount; i++) {
670                Element srcElem1 = gtoh(srcReg1.elements[i]);
671                Element srcElem2 = gtoh(srcReg2.elements[i]);
672                Element destElem;
673                %(readDest)s
674                %(op)s
675                destReg.elements[i] = htog(destElem);
676            }
677            ''' % { "op" : op, "readDest" : readDestCode }
678        for reg in range(rCount):
679            eWalkCode += '''
680            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681            ''' % { "reg" : reg }
682        iop = InstObjParams(name, Name,
683                            "RegRegRegOp",
684                            { "code": eWalkCode,
685                              "r_count": rCount,
686                              "predicate_test": predicateTest,
687                              "op_class": opClass }, [])
688        header_output += NeonRegRegRegOpDeclare.subst(iop)
689        exec_output += NeonEqualRegExecute.subst(iop)
690        for type in types:
691            substDict = { "targs" : type,
692                          "class_name" : Name }
693            exec_output += NeonExecDeclare.subst(substDict)
694
695    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696                            readDest=False, pairwise=False, toInt=False):
697        global header_output, exec_output
698        eWalkCode = simdEnabledCheckCode + '''
699        typedef FloatReg FloatVect[rCount];
700        FloatVect srcRegs1, srcRegs2;
701        '''
702        if toInt:
703            eWalkCode += 'RegVect destRegs;\n'
704        else:
705            eWalkCode += 'FloatVect destRegs;\n'
706        for reg in range(rCount):
707            eWalkCode += '''
708                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710            ''' % { "reg" : reg }
711            if readDest:
712                if toInt:
713                    eWalkCode += '''
714                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715                    ''' % { "reg" : reg }
716                else:
717                    eWalkCode += '''
718                        destRegs[%(reg)d] = FpDestP%(reg)d;
719                    ''' % { "reg" : reg }
720        readDestCode = ''
721        if readDest:
722            readDestCode = 'destReg = destRegs[r];'
723        destType = 'FloatReg'
724        writeDest = 'destRegs[r] = destReg;'
725        if toInt:
726            destType = 'FloatRegBits'
727            writeDest = 'destRegs.regs[r] = destReg;'
728        if pairwise:
729            eWalkCode += '''
730            for (unsigned r = 0; r < rCount; r++) {
731                FloatReg srcReg1 = (2 * r < rCount) ?
732                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733                FloatReg srcReg2 = (2 * r < rCount) ?
734                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735                %(destType)s destReg;
736                %(readDest)s
737                %(op)s
738                %(writeDest)s
739            }
740            ''' % { "op" : op,
741                    "readDest" : readDestCode,
742                    "destType" : destType,
743                    "writeDest" : writeDest }
744        else:
745            eWalkCode += '''
746            for (unsigned r = 0; r < rCount; r++) {
747                FloatReg srcReg1 = srcRegs1[r];
748                FloatReg srcReg2 = srcRegs2[r];
749                %(destType)s destReg;
750                %(readDest)s
751                %(op)s
752                %(writeDest)s
753            }
754            ''' % { "op" : op,
755                    "readDest" : readDestCode,
756                    "destType" : destType,
757                    "writeDest" : writeDest }
758        for reg in range(rCount):
759            if toInt:
760                eWalkCode += '''
761                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762                ''' % { "reg" : reg }
763            else:
764                eWalkCode += '''
765                FpDestP%(reg)d = destRegs[%(reg)d];
766                ''' % { "reg" : reg }
767        iop = InstObjParams(name, Name,
768                            "FpRegRegRegOp",
769                            { "code": eWalkCode,
770                              "r_count": rCount,
771                              "predicate_test": predicateTest,
772                              "op_class": opClass }, [])
773        header_output += NeonRegRegRegOpDeclare.subst(iop)
774        exec_output += NeonEqualRegExecute.subst(iop)
775        for type in types:
776            substDict = { "targs" : type,
777                          "class_name" : Name }
778            exec_output += NeonExecDeclare.subst(substDict)
779
780    def threeUnequalRegInst(name, Name, opClass, types, op,
781                            bigSrc1, bigSrc2, bigDest, readDest):
782        global header_output, exec_output
783        src1Cnt = src2Cnt = destCnt = 2
784        src1Prefix = src2Prefix = destPrefix = ''
785        if bigSrc1:
786            src1Cnt = 4
787            src1Prefix = 'Big'
788        if bigSrc2:
789            src2Cnt = 4
790            src2Prefix = 'Big'
791        if bigDest:
792            destCnt = 4
793            destPrefix = 'Big'
794        eWalkCode = simdEnabledCheckCode + '''
795            %sRegVect srcReg1;
796            %sRegVect srcReg2;
797            %sRegVect destReg;
798        ''' % (src1Prefix, src2Prefix, destPrefix)
799        for reg in range(src1Cnt):
800            eWalkCode += '''
801                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802            ''' % { "reg" : reg }
803        for reg in range(src2Cnt):
804            eWalkCode += '''
805                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806            ''' % { "reg" : reg }
807        if readDest:
808            for reg in range(destCnt):
809                eWalkCode += '''
810                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811                ''' % { "reg" : reg }
812        readDestCode = ''
813        if readDest:
814            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815        eWalkCode += '''
816        for (unsigned i = 0; i < eCount; i++) {
817            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819            %(destPrefix)sElement destElem;
820            %(readDest)s
821            %(op)s
822            destReg.elements[i] = htog(destElem);
823        }
824        ''' % { "op" : op, "readDest" : readDestCode,
825                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826                "destPrefix" : destPrefix }
827        for reg in range(destCnt):
828            eWalkCode += '''
829            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830            ''' % { "reg" : reg }
831        iop = InstObjParams(name, Name,
832                            "RegRegRegOp",
833                            { "code": eWalkCode,
834                              "r_count": 2,
835                              "predicate_test": predicateTest,
836                              "op_class": opClass }, [])
837        header_output += NeonRegRegRegOpDeclare.subst(iop)
838        exec_output += NeonUnequalRegExecute.subst(iop)
839        for type in types:
840            substDict = { "targs" : type,
841                          "class_name" : Name }
842            exec_output += NeonExecDeclare.subst(substDict)
843
844    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845        threeUnequalRegInst(name, Name, opClass, types, op,
846                            True, True, False, readDest)
847
848    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849        threeUnequalRegInst(name, Name, opClass, types, op,
850                            False, False, True, readDest)
851
852    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853        threeUnequalRegInst(name, Name, opClass, types, op,
854                            True, False, True, readDest)
855
856    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857        global header_output, exec_output
858        eWalkCode = simdEnabledCheckCode + '''
859        RegVect srcReg1, srcReg2, destReg;
860        '''
861        for reg in range(rCount):
862            eWalkCode += '''
863                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865            ''' % { "reg" : reg }
866            if readDest:
867                eWalkCode += '''
868                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869                ''' % { "reg" : reg }
870        readDestCode = ''
871        if readDest:
872            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873        eWalkCode += '''
874        if (imm < 0 && imm >= eCount) {
875            if (FullSystem)
876                fault = new UndefinedInstruction;
877            else
878                fault = new UndefinedInstruction(false, mnemonic);
879        } else {
880            for (unsigned i = 0; i < eCount; i++) {
881                Element srcElem1 = gtoh(srcReg1.elements[i]);
882                Element srcElem2 = gtoh(srcReg2.elements[imm]);
883                Element destElem;
884                %(readDest)s
885                %(op)s
886                destReg.elements[i] = htog(destElem);
887            }
888        }
889        ''' % { "op" : op, "readDest" : readDestCode }
890        for reg in range(rCount):
891            eWalkCode += '''
892            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
893            ''' % { "reg" : reg }
894        iop = InstObjParams(name, Name,
895                            "RegRegRegImmOp",
896                            { "code": eWalkCode,
897                              "r_count": rCount,
898                              "predicate_test": predicateTest,
899                              "op_class": opClass }, [])
900        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
901        exec_output += NeonEqualRegExecute.subst(iop)
902        for type in types:
903            substDict = { "targs" : type,
904                          "class_name" : Name }
905            exec_output += NeonExecDeclare.subst(substDict)
906
907    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
908        global header_output, exec_output
909        rCount = 2
910        eWalkCode = simdEnabledCheckCode + '''
911        RegVect srcReg1, srcReg2;
912        BigRegVect destReg;
913        '''
914        for reg in range(rCount):
915            eWalkCode += '''
916                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
917                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
918            ''' % { "reg" : reg }
919        if readDest:
920            for reg in range(2 * rCount):
921                eWalkCode += '''
922                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
923                ''' % { "reg" : reg }
924        readDestCode = ''
925        if readDest:
926            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
927        eWalkCode += '''
928        if (imm < 0 && imm >= eCount) {
929            if (FullSystem)
930                fault = new UndefinedInstruction;
931            else
932                fault = new UndefinedInstruction(false, mnemonic);
933        } else {
934            for (unsigned i = 0; i < eCount; i++) {
935                Element srcElem1 = gtoh(srcReg1.elements[i]);
936                Element srcElem2 = gtoh(srcReg2.elements[imm]);
937                BigElement destElem;
938                %(readDest)s
939                %(op)s
940                destReg.elements[i] = htog(destElem);
941            }
942        }
943        ''' % { "op" : op, "readDest" : readDestCode }
944        for reg in range(2 * rCount):
945            eWalkCode += '''
946            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
947            ''' % { "reg" : reg }
948        iop = InstObjParams(name, Name,
949                            "RegRegRegImmOp",
950                            { "code": eWalkCode,
951                              "r_count": rCount,
952                              "predicate_test": predicateTest,
953                              "op_class": opClass }, [])
954        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
955        exec_output += NeonUnequalRegExecute.subst(iop)
956        for type in types:
957            substDict = { "targs" : type,
958                          "class_name" : Name }
959            exec_output += NeonExecDeclare.subst(substDict)
960
961    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
962        global header_output, exec_output
963        eWalkCode = simdEnabledCheckCode + '''
964        typedef FloatReg FloatVect[rCount];
965        FloatVect srcRegs1, srcRegs2, destRegs;
966        '''
967        for reg in range(rCount):
968            eWalkCode += '''
969                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
970                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
971            ''' % { "reg" : reg }
972            if readDest:
973                eWalkCode += '''
974                    destRegs[%(reg)d] = FpDestP%(reg)d;
975                ''' % { "reg" : reg }
976        readDestCode = ''
977        if readDest:
978            readDestCode = 'destReg = destRegs[i];'
979        eWalkCode += '''
980        if (imm < 0 && imm >= eCount) {
981            if (FullSystem)
982                fault = new UndefinedInstruction;
983            else
984                fault = new UndefinedInstruction(false, mnemonic);
985        } else {
986            for (unsigned i = 0; i < rCount; i++) {
987                FloatReg srcReg1 = srcRegs1[i];
988                FloatReg srcReg2 = srcRegs2[imm];
989                FloatReg destReg;
990                %(readDest)s
991                %(op)s
992                destRegs[i] = destReg;
993            }
994        }
995        ''' % { "op" : op, "readDest" : readDestCode }
996        for reg in range(rCount):
997            eWalkCode += '''
998            FpDestP%(reg)d = destRegs[%(reg)d];
999            ''' % { "reg" : reg }
1000        iop = InstObjParams(name, Name,
1001                            "FpRegRegRegImmOp",
1002                            { "code": eWalkCode,
1003                              "r_count": rCount,
1004                              "predicate_test": predicateTest,
1005                              "op_class": opClass }, [])
1006        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1007        exec_output += NeonEqualRegExecute.subst(iop)
1008        for type in types:
1009            substDict = { "targs" : type,
1010                          "class_name" : Name }
1011            exec_output += NeonExecDeclare.subst(substDict)
1012
1013    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1014            readDest=False, toInt=False, fromInt=False):
1015        global header_output, exec_output
1016        eWalkCode = simdEnabledCheckCode + '''
1017        RegVect srcRegs1, destRegs;
1018        '''
1019        for reg in range(rCount):
1020            eWalkCode += '''
1021                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1022            ''' % { "reg" : reg }
1023            if readDest:
1024                eWalkCode += '''
1025                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1026                ''' % { "reg" : reg }
1027        readDestCode = ''
1028        if readDest:
1029            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1030            if toInt:
1031                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1032        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1033        if fromInt:
1034            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1035        declDest = 'Element destElem;'
1036        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1037        if toInt:
1038            declDest = 'FloatRegBits destReg;'
1039            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1040        eWalkCode += '''
1041        for (unsigned i = 0; i < eCount; i++) {
1042            %(readOp)s
1043            %(declDest)s
1044            %(readDest)s
1045            %(op)s
1046            %(writeDest)s
1047        }
1048        ''' % { "readOp" : readOpCode,
1049                "declDest" : declDest,
1050                "readDest" : readDestCode,
1051                "op" : op,
1052                "writeDest" : writeDestCode }
1053        for reg in range(rCount):
1054            eWalkCode += '''
1055            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1056            ''' % { "reg" : reg }
1057        iop = InstObjParams(name, Name,
1058                            "RegRegImmOp",
1059                            { "code": eWalkCode,
1060                              "r_count": rCount,
1061                              "predicate_test": predicateTest,
1062                              "op_class": opClass }, [])
1063        header_output += NeonRegRegImmOpDeclare.subst(iop)
1064        exec_output += NeonEqualRegExecute.subst(iop)
1065        for type in types:
1066            substDict = { "targs" : type,
1067                          "class_name" : Name }
1068            exec_output += NeonExecDeclare.subst(substDict)
1069
1070    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1071        global header_output, exec_output
1072        eWalkCode = simdEnabledCheckCode + '''
1073        BigRegVect srcReg1;
1074        RegVect destReg;
1075        '''
1076        for reg in range(4):
1077            eWalkCode += '''
1078                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1079            ''' % { "reg" : reg }
1080        if readDest:
1081            for reg in range(2):
1082                eWalkCode += '''
1083                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1084                ''' % { "reg" : reg }
1085        readDestCode = ''
1086        if readDest:
1087            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1088        eWalkCode += '''
1089        for (unsigned i = 0; i < eCount; i++) {
1090            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1091            Element destElem;
1092            %(readDest)s
1093            %(op)s
1094            destReg.elements[i] = htog(destElem);
1095        }
1096        ''' % { "op" : op, "readDest" : readDestCode }
1097        for reg in range(2):
1098            eWalkCode += '''
1099            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1100            ''' % { "reg" : reg }
1101        iop = InstObjParams(name, Name,
1102                            "RegRegImmOp",
1103                            { "code": eWalkCode,
1104                              "r_count": 2,
1105                              "predicate_test": predicateTest,
1106                              "op_class": opClass }, [])
1107        header_output += NeonRegRegImmOpDeclare.subst(iop)
1108        exec_output += NeonUnequalRegExecute.subst(iop)
1109        for type in types:
1110            substDict = { "targs" : type,
1111                          "class_name" : Name }
1112            exec_output += NeonExecDeclare.subst(substDict)
1113
1114    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1115        global header_output, exec_output
1116        eWalkCode = simdEnabledCheckCode + '''
1117        RegVect srcReg1;
1118        BigRegVect destReg;
1119        '''
1120        for reg in range(2):
1121            eWalkCode += '''
1122                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1123            ''' % { "reg" : reg }
1124        if readDest:
1125            for reg in range(4):
1126                eWalkCode += '''
1127                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1128                ''' % { "reg" : reg }
1129        readDestCode = ''
1130        if readDest:
1131            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1132        eWalkCode += '''
1133        for (unsigned i = 0; i < eCount; i++) {
1134            Element srcElem1 = gtoh(srcReg1.elements[i]);
1135            BigElement destElem;
1136            %(readDest)s
1137            %(op)s
1138            destReg.elements[i] = htog(destElem);
1139        }
1140        ''' % { "op" : op, "readDest" : readDestCode }
1141        for reg in range(4):
1142            eWalkCode += '''
1143            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1144            ''' % { "reg" : reg }
1145        iop = InstObjParams(name, Name,
1146                            "RegRegImmOp",
1147                            { "code": eWalkCode,
1148                              "r_count": 2,
1149                              "predicate_test": predicateTest,
1150                              "op_class": opClass }, [])
1151        header_output += NeonRegRegImmOpDeclare.subst(iop)
1152        exec_output += NeonUnequalRegExecute.subst(iop)
1153        for type in types:
1154            substDict = { "targs" : type,
1155                          "class_name" : Name }
1156            exec_output += NeonExecDeclare.subst(substDict)
1157
1158    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1159        global header_output, exec_output
1160        eWalkCode = simdEnabledCheckCode + '''
1161        RegVect srcReg1, destReg;
1162        '''
1163        for reg in range(rCount):
1164            eWalkCode += '''
1165                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1166            ''' % { "reg" : reg }
1167            if readDest:
1168                eWalkCode += '''
1169                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1170                ''' % { "reg" : reg }
1171        readDestCode = ''
1172        if readDest:
1173            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1174        eWalkCode += '''
1175        for (unsigned i = 0; i < eCount; i++) {
1176            unsigned j = i;
1177            Element srcElem1 = gtoh(srcReg1.elements[i]);
1178            Element destElem;
1179            %(readDest)s
1180            %(op)s
1181            destReg.elements[j] = htog(destElem);
1182        }
1183        ''' % { "op" : op, "readDest" : readDestCode }
1184        for reg in range(rCount):
1185            eWalkCode += '''
1186            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1187            ''' % { "reg" : reg }
1188        iop = InstObjParams(name, Name,
1189                            "RegRegOp",
1190                            { "code": eWalkCode,
1191                              "r_count": rCount,
1192                              "predicate_test": predicateTest,
1193                              "op_class": opClass }, [])
1194        header_output += NeonRegRegOpDeclare.subst(iop)
1195        exec_output += NeonEqualRegExecute.subst(iop)
1196        for type in types:
1197            substDict = { "targs" : type,
1198                          "class_name" : Name }
1199            exec_output += NeonExecDeclare.subst(substDict)
1200
1201    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1202        global header_output, exec_output
1203        eWalkCode = simdEnabledCheckCode + '''
1204        RegVect srcReg1, destReg;
1205        '''
1206        for reg in range(rCount):
1207            eWalkCode += '''
1208                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1209            ''' % { "reg" : reg }
1210            if readDest:
1211                eWalkCode += '''
1212                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1213                ''' % { "reg" : reg }
1214        readDestCode = ''
1215        if readDest:
1216            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1217        eWalkCode += '''
1218        for (unsigned i = 0; i < eCount; i++) {
1219            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1220            Element destElem;
1221            %(readDest)s
1222            %(op)s
1223            destReg.elements[i] = htog(destElem);
1224        }
1225        ''' % { "op" : op, "readDest" : readDestCode }
1226        for reg in range(rCount):
1227            eWalkCode += '''
1228            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1229            ''' % { "reg" : reg }
1230        iop = InstObjParams(name, Name,
1231                            "RegRegImmOp",
1232                            { "code": eWalkCode,
1233                              "r_count": rCount,
1234                              "predicate_test": predicateTest,
1235                              "op_class": opClass }, [])
1236        header_output += NeonRegRegImmOpDeclare.subst(iop)
1237        exec_output += NeonEqualRegExecute.subst(iop)
1238        for type in types:
1239            substDict = { "targs" : type,
1240                          "class_name" : Name }
1241            exec_output += NeonExecDeclare.subst(substDict)
1242
1243    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1244        global header_output, exec_output
1245        eWalkCode = simdEnabledCheckCode + '''
1246        RegVect srcReg1, destReg;
1247        '''
1248        for reg in range(rCount):
1249            eWalkCode += '''
1250                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1251                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1252            ''' % { "reg" : reg }
1253            if readDest:
1254                eWalkCode += '''
1255                ''' % { "reg" : reg }
1256        readDestCode = ''
1257        if readDest:
1258            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1259        eWalkCode += op
1260        for reg in range(rCount):
1261            eWalkCode += '''
1262            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1263            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1264            ''' % { "reg" : reg }
1265        iop = InstObjParams(name, Name,
1266                            "RegRegOp",
1267                            { "code": eWalkCode,
1268                              "r_count": rCount,
1269                              "predicate_test": predicateTest,
1270                              "op_class": opClass }, [])
1271        header_output += NeonRegRegOpDeclare.subst(iop)
1272        exec_output += NeonEqualRegExecute.subst(iop)
1273        for type in types:
1274            substDict = { "targs" : type,
1275                          "class_name" : Name }
1276            exec_output += NeonExecDeclare.subst(substDict)
1277
1278    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1279            readDest=False, toInt=False):
1280        global header_output, exec_output
1281        eWalkCode = simdEnabledCheckCode + '''
1282        typedef FloatReg FloatVect[rCount];
1283        FloatVect srcRegs1;
1284        '''
1285        if toInt:
1286            eWalkCode += 'RegVect destRegs;\n'
1287        else:
1288            eWalkCode += 'FloatVect destRegs;\n'
1289        for reg in range(rCount):
1290            eWalkCode += '''
1291                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1292            ''' % { "reg" : reg }
1293            if readDest:
1294                if toInt:
1295                    eWalkCode += '''
1296                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1297                    ''' % { "reg" : reg }
1298                else:
1299                    eWalkCode += '''
1300                        destRegs[%(reg)d] = FpDestP%(reg)d;
1301                    ''' % { "reg" : reg }
1302        readDestCode = ''
1303        if readDest:
1304            readDestCode = 'destReg = destRegs[i];'
1305        destType = 'FloatReg'
1306        writeDest = 'destRegs[r] = destReg;'
1307        if toInt:
1308            destType = 'FloatRegBits'
1309            writeDest = 'destRegs.regs[r] = destReg;'
1310        eWalkCode += '''
1311        for (unsigned r = 0; r < rCount; r++) {
1312            FloatReg srcReg1 = srcRegs1[r];
1313            %(destType)s destReg;
1314            %(readDest)s
1315            %(op)s
1316            %(writeDest)s
1317        }
1318        ''' % { "op" : op,
1319                "readDest" : readDestCode,
1320                "destType" : destType,
1321                "writeDest" : writeDest }
1322        for reg in range(rCount):
1323            if toInt:
1324                eWalkCode += '''
1325                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1326                ''' % { "reg" : reg }
1327            else:
1328                eWalkCode += '''
1329                FpDestP%(reg)d = destRegs[%(reg)d];
1330                ''' % { "reg" : reg }
1331        iop = InstObjParams(name, Name,
1332                            "FpRegRegOp",
1333                            { "code": eWalkCode,
1334                              "r_count": rCount,
1335                              "predicate_test": predicateTest,
1336                              "op_class": opClass }, [])
1337        header_output += NeonRegRegOpDeclare.subst(iop)
1338        exec_output += NeonEqualRegExecute.subst(iop)
1339        for type in types:
1340            substDict = { "targs" : type,
1341                          "class_name" : Name }
1342            exec_output += NeonExecDeclare.subst(substDict)
1343
1344    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1345        global header_output, exec_output
1346        eWalkCode = simdEnabledCheckCode + '''
1347        RegVect srcRegs;
1348        BigRegVect destReg;
1349        '''
1350        for reg in range(rCount):
1351            eWalkCode += '''
1352                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1353            ''' % { "reg" : reg }
1354            if readDest:
1355                eWalkCode += '''
1356                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1357                ''' % { "reg" : reg }
1358        readDestCode = ''
1359        if readDest:
1360            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1361        eWalkCode += '''
1362        for (unsigned i = 0; i < eCount / 2; i++) {
1363            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1364            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1365            BigElement destElem;
1366            %(readDest)s
1367            %(op)s
1368            destReg.elements[i] = htog(destElem);
1369        }
1370        ''' % { "op" : op, "readDest" : readDestCode }
1371        for reg in range(rCount):
1372            eWalkCode += '''
1373            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1374            ''' % { "reg" : reg }
1375        iop = InstObjParams(name, Name,
1376                            "RegRegOp",
1377                            { "code": eWalkCode,
1378                              "r_count": rCount,
1379                              "predicate_test": predicateTest,
1380                              "op_class": opClass }, [])
1381        header_output += NeonRegRegOpDeclare.subst(iop)
1382        exec_output += NeonUnequalRegExecute.subst(iop)
1383        for type in types:
1384            substDict = { "targs" : type,
1385                          "class_name" : Name }
1386            exec_output += NeonExecDeclare.subst(substDict)
1387
1388    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1389        global header_output, exec_output
1390        eWalkCode = simdEnabledCheckCode + '''
1391        BigRegVect srcReg1;
1392        RegVect destReg;
1393        '''
1394        for reg in range(4):
1395            eWalkCode += '''
1396                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1397            ''' % { "reg" : reg }
1398        if readDest:
1399            for reg in range(2):
1400                eWalkCode += '''
1401                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1402                ''' % { "reg" : reg }
1403        readDestCode = ''
1404        if readDest:
1405            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1406        eWalkCode += '''
1407        for (unsigned i = 0; i < eCount; i++) {
1408            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1409            Element destElem;
1410            %(readDest)s
1411            %(op)s
1412            destReg.elements[i] = htog(destElem);
1413        }
1414        ''' % { "op" : op, "readDest" : readDestCode }
1415        for reg in range(2):
1416            eWalkCode += '''
1417            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1418            ''' % { "reg" : reg }
1419        iop = InstObjParams(name, Name,
1420                            "RegRegOp",
1421                            { "code": eWalkCode,
1422                              "r_count": 2,
1423                              "predicate_test": predicateTest,
1424                              "op_class": opClass }, [])
1425        header_output += NeonRegRegOpDeclare.subst(iop)
1426        exec_output += NeonUnequalRegExecute.subst(iop)
1427        for type in types:
1428            substDict = { "targs" : type,
1429                          "class_name" : Name }
1430            exec_output += NeonExecDeclare.subst(substDict)
1431
1432    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1433        global header_output, exec_output
1434        eWalkCode = simdEnabledCheckCode + '''
1435        RegVect destReg;
1436        '''
1437        if readDest:
1438            for reg in range(rCount):
1439                eWalkCode += '''
1440                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1441                ''' % { "reg" : reg }
1442        readDestCode = ''
1443        if readDest:
1444            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1445        eWalkCode += '''
1446        for (unsigned i = 0; i < eCount; i++) {
1447            Element destElem;
1448            %(readDest)s
1449            %(op)s
1450            destReg.elements[i] = htog(destElem);
1451        }
1452        ''' % { "op" : op, "readDest" : readDestCode }
1453        for reg in range(rCount):
1454            eWalkCode += '''
1455            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1456            ''' % { "reg" : reg }
1457        iop = InstObjParams(name, Name,
1458                            "RegImmOp",
1459                            { "code": eWalkCode,
1460                              "r_count": rCount,
1461                              "predicate_test": predicateTest,
1462                              "op_class": opClass }, [])
1463        header_output += NeonRegImmOpDeclare.subst(iop)
1464        exec_output += NeonEqualRegExecute.subst(iop)
1465        for type in types:
1466            substDict = { "targs" : type,
1467                          "class_name" : Name }
1468            exec_output += NeonExecDeclare.subst(substDict)
1469
1470    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1471        global header_output, exec_output
1472        eWalkCode = simdEnabledCheckCode + '''
1473        RegVect srcReg1;
1474        BigRegVect destReg;
1475        '''
1476        for reg in range(2):
1477            eWalkCode += '''
1478                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1479            ''' % { "reg" : reg }
1480        if readDest:
1481            for reg in range(4):
1482                eWalkCode += '''
1483                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1484                ''' % { "reg" : reg }
1485        readDestCode = ''
1486        if readDest:
1487            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1488        eWalkCode += '''
1489        for (unsigned i = 0; i < eCount; i++) {
1490            Element srcElem1 = gtoh(srcReg1.elements[i]);
1491            BigElement destElem;
1492            %(readDest)s
1493            %(op)s
1494            destReg.elements[i] = htog(destElem);
1495        }
1496        ''' % { "op" : op, "readDest" : readDestCode }
1497        for reg in range(4):
1498            eWalkCode += '''
1499            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1500            ''' % { "reg" : reg }
1501        iop = InstObjParams(name, Name,
1502                            "RegRegOp",
1503                            { "code": eWalkCode,
1504                              "r_count": 2,
1505                              "predicate_test": predicateTest,
1506                              "op_class": opClass }, [])
1507        header_output += NeonRegRegOpDeclare.subst(iop)
1508        exec_output += NeonUnequalRegExecute.subst(iop)
1509        for type in types:
1510            substDict = { "targs" : type,
1511                          "class_name" : Name }
1512            exec_output += NeonExecDeclare.subst(substDict)
1513
1514    vhaddCode = '''
1515        Element carryBit =
1516            (((unsigned)srcElem1 & 0x1) +
1517             ((unsigned)srcElem2 & 0x1)) >> 1;
1518        // Use division instead of a shift to ensure the sign extension works
1519        // right. The compiler will figure out if it can be a shift. Mask the
1520        // inputs so they get truncated correctly.
1521        destElem = (((srcElem1 & ~(Element)1) / 2) +
1522                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1523    '''
1524    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1525    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1526
1527    vrhaddCode = '''
1528        Element carryBit =
1529            (((unsigned)srcElem1 & 0x1) +
1530             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1531        // Use division instead of a shift to ensure the sign extension works
1532        // right. The compiler will figure out if it can be a shift. Mask the
1533        // inputs so they get truncated correctly.
1534        destElem = (((srcElem1 & ~(Element)1) / 2) +
1535                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1536    '''
1537    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1538    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1539
1540    vhsubCode = '''
1541        Element barrowBit =
1542            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1543        // Use division instead of a shift to ensure the sign extension works
1544        // right. The compiler will figure out if it can be a shift. Mask the
1545        // inputs so they get truncated correctly.
1546        destElem = (((srcElem1 & ~(Element)1) / 2) -
1547                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1548    '''
1549    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1550    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1551
1552    vandCode = '''
1553        destElem = srcElem1 & srcElem2;
1554    '''
1555    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1556    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1557
1558    vbicCode = '''
1559        destElem = srcElem1 & ~srcElem2;
1560    '''
1561    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1562    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1563
1564    vorrCode = '''
1565        destElem = srcElem1 | srcElem2;
1566    '''
1567    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1568    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1569
1570    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1571    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1572
1573    vornCode = '''
1574        destElem = srcElem1 | ~srcElem2;
1575    '''
1576    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1577    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1578
1579    veorCode = '''
1580        destElem = srcElem1 ^ srcElem2;
1581    '''
1582    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1583    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1584
1585    vbifCode = '''
1586        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1587    '''
1588    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1589    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1590    vbitCode = '''
1591        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1592    '''
1593    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1594    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1595    vbslCode = '''
1596        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1597    '''
1598    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1599    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1600
1601    vmaxCode = '''
1602        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1603    '''
1604    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1605    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1606
1607    vminCode = '''
1608        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1609    '''
1610    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1611    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1612
1613    vaddCode = '''
1614        destElem = srcElem1 + srcElem2;
1615    '''
1616    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1617    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1618
1619    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
1620                      2, vaddCode, pairwise=True)
1621    vaddlwCode = '''
1622        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1623    '''
1624    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1625    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1626    vaddhnCode = '''
1627        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1628                   (sizeof(Element) * 8);
1629    '''
1630    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1631    vraddhnCode = '''
1632        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1633                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1634                   (sizeof(Element) * 8);
1635    '''
1636    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1637
1638    vsubCode = '''
1639        destElem = srcElem1 - srcElem2;
1640    '''
1641    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1642    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1643    vsublwCode = '''
1644        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1645    '''
1646    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1647    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1648
1649    vqaddUCode = '''
1650        destElem = srcElem1 + srcElem2;
1651        FPSCR fpscr = (FPSCR) FpscrQc;
1652        if (destElem < srcElem1 || destElem < srcElem2) {
1653            destElem = (Element)(-1);
1654            fpscr.qc = 1;
1655        }
1656        FpscrQc = fpscr;
1657    '''
1658    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1659    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1660    vsubhnCode = '''
1661        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1662                   (sizeof(Element) * 8);
1663    '''
1664    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1665    vrsubhnCode = '''
1666        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1667                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1668                   (sizeof(Element) * 8);
1669    '''
1670    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1671
1672    vqaddSCode = '''
1673        destElem = srcElem1 + srcElem2;
1674        FPSCR fpscr = (FPSCR) FpscrQc;
1675        bool negDest = (destElem < 0);
1676        bool negSrc1 = (srcElem1 < 0);
1677        bool negSrc2 = (srcElem2 < 0);
1678        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1679            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1680            if (negDest)
1681                destElem -= 1;
1682            fpscr.qc = 1;
1683        }
1684        FpscrQc = fpscr;
1685    '''
1686    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1687    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1688
1689    vqsubUCode = '''
1690        destElem = srcElem1 - srcElem2;
1691        FPSCR fpscr = (FPSCR) FpscrQc;
1692        if (destElem > srcElem1) {
1693            destElem = 0;
1694            fpscr.qc = 1;
1695        }
1696        FpscrQc = fpscr;
1697    '''
1698    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1699    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1700
1701    vqsubSCode = '''
1702        destElem = srcElem1 - srcElem2;
1703        FPSCR fpscr = (FPSCR) FpscrQc;
1704        bool negDest = (destElem < 0);
1705        bool negSrc1 = (srcElem1 < 0);
1706        bool posSrc2 = (srcElem2 >= 0);
1707        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1708            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1709            if (negDest)
1710                destElem -= 1;
1711            fpscr.qc = 1;
1712        }
1713        FpscrQc = fpscr;
1714    '''
1715    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1716    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1717
1718    vcgtCode = '''
1719        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1720    '''
1721    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1722    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1723
1724    vcgeCode = '''
1725        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1726    '''
1727    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1728    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1729
1730    vceqCode = '''
1731        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1732    '''
1733    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1734    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1735
1736    vshlCode = '''
1737        int16_t shiftAmt = (int8_t)srcElem2;
1738        if (shiftAmt < 0) {
1739            shiftAmt = -shiftAmt;
1740            if (shiftAmt >= sizeof(Element) * 8) {
1741                shiftAmt = sizeof(Element) * 8 - 1;
1742                destElem = 0;
1743            } else {
1744                destElem = (srcElem1 >> shiftAmt);
1745            }
1746            // Make sure the right shift sign extended when it should.
1747            if (ltz(srcElem1) && !ltz(destElem)) {
1748                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1749                                             1 - shiftAmt));
1750            }
1751        } else {
1752            if (shiftAmt >= sizeof(Element) * 8) {
1753                destElem = 0;
1754            } else {
1755                destElem = srcElem1 << shiftAmt;
1756            }
1757        }
1758    '''
1759    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1760    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1761
1762    vrshlCode = '''
1763        int16_t shiftAmt = (int8_t)srcElem2;
1764        if (shiftAmt < 0) {
1765            shiftAmt = -shiftAmt;
1766            Element rBit = 0;
1767            if (shiftAmt <= sizeof(Element) * 8)
1768                rBit = bits(srcElem1, shiftAmt - 1);
1769            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1770                rBit = 1;
1771            if (shiftAmt >= sizeof(Element) * 8) {
1772                shiftAmt = sizeof(Element) * 8 - 1;
1773                destElem = 0;
1774            } else {
1775                destElem = (srcElem1 >> shiftAmt);
1776            }
1777            // Make sure the right shift sign extended when it should.
1778            if (ltz(srcElem1) && !ltz(destElem)) {
1779                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1780                                             1 - shiftAmt));
1781            }
1782            destElem += rBit;
1783        } else if (shiftAmt > 0) {
1784            if (shiftAmt >= sizeof(Element) * 8) {
1785                destElem = 0;
1786            } else {
1787                destElem = srcElem1 << shiftAmt;
1788            }
1789        } else {
1790            destElem = srcElem1;
1791        }
1792    '''
1793    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1794    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1795
1796    vqshlUCode = '''
1797        int16_t shiftAmt = (int8_t)srcElem2;
1798        FPSCR fpscr = (FPSCR) FpscrQc;
1799        if (shiftAmt < 0) {
1800            shiftAmt = -shiftAmt;
1801            if (shiftAmt >= sizeof(Element) * 8) {
1802                shiftAmt = sizeof(Element) * 8 - 1;
1803                destElem = 0;
1804            } else {
1805                destElem = (srcElem1 >> shiftAmt);
1806            }
1807        } else if (shiftAmt > 0) {
1808            if (shiftAmt >= sizeof(Element) * 8) {
1809                if (srcElem1 != 0) {
1810                    destElem = mask(sizeof(Element) * 8);
1811                    fpscr.qc = 1;
1812                } else {
1813                    destElem = 0;
1814                }
1815            } else {
1816                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1817                            sizeof(Element) * 8 - shiftAmt)) {
1818                    destElem = mask(sizeof(Element) * 8);
1819                    fpscr.qc = 1;
1820                } else {
1821                    destElem = srcElem1 << shiftAmt;
1822                }
1823            }
1824        } else {
1825            destElem = srcElem1;
1826        }
1827        FpscrQc = fpscr;
1828    '''
1829    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1830    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1831
1832    vqshlSCode = '''
1833        int16_t shiftAmt = (int8_t)srcElem2;
1834        FPSCR fpscr = (FPSCR) FpscrQc;
1835        if (shiftAmt < 0) {
1836            shiftAmt = -shiftAmt;
1837            if (shiftAmt >= sizeof(Element) * 8) {
1838                shiftAmt = sizeof(Element) * 8 - 1;
1839                destElem = 0;
1840            } else {
1841                destElem = (srcElem1 >> shiftAmt);
1842            }
1843            // Make sure the right shift sign extended when it should.
1844            if (srcElem1 < 0 && destElem >= 0) {
1845                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1846                                             1 - shiftAmt));
1847            }
1848        } else if (shiftAmt > 0) {
1849            bool sat = false;
1850            if (shiftAmt >= sizeof(Element) * 8) {
1851                if (srcElem1 != 0)
1852                    sat = true;
1853                else
1854                    destElem = 0;
1855            } else {
1856                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1857                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1858                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1859                    sat = true;
1860                } else {
1861                    destElem = srcElem1 << shiftAmt;
1862                }
1863            }
1864            if (sat) {
1865                fpscr.qc = 1;
1866                destElem = mask(sizeof(Element) * 8 - 1);
1867                if (srcElem1 < 0)
1868                    destElem = ~destElem;
1869            }
1870        } else {
1871            destElem = srcElem1;
1872        }
1873        FpscrQc = fpscr;
1874    '''
1875    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1876    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1877
1878    vqrshlUCode = '''
1879        int16_t shiftAmt = (int8_t)srcElem2;
1880        FPSCR fpscr = (FPSCR) FpscrQc;
1881        if (shiftAmt < 0) {
1882            shiftAmt = -shiftAmt;
1883            Element rBit = 0;
1884            if (shiftAmt <= sizeof(Element) * 8)
1885                rBit = bits(srcElem1, shiftAmt - 1);
1886            if (shiftAmt >= sizeof(Element) * 8) {
1887                shiftAmt = sizeof(Element) * 8 - 1;
1888                destElem = 0;
1889            } else {
1890                destElem = (srcElem1 >> shiftAmt);
1891            }
1892            destElem += rBit;
1893        } else {
1894            if (shiftAmt >= sizeof(Element) * 8) {
1895                if (srcElem1 != 0) {
1896                    destElem = mask(sizeof(Element) * 8);
1897                    fpscr.qc = 1;
1898                } else {
1899                    destElem = 0;
1900                }
1901            } else {
1902                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1903                            sizeof(Element) * 8 - shiftAmt)) {
1904                    destElem = mask(sizeof(Element) * 8);
1905                    fpscr.qc = 1;
1906                } else {
1907                    destElem = srcElem1 << shiftAmt;
1908                }
1909            }
1910        }
1911        FpscrQc = fpscr;
1912    '''
1913    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1914    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1915
1916    vqrshlSCode = '''
1917        int16_t shiftAmt = (int8_t)srcElem2;
1918        FPSCR fpscr = (FPSCR) FpscrQc;
1919        if (shiftAmt < 0) {
1920            shiftAmt = -shiftAmt;
1921            Element rBit = 0;
1922            if (shiftAmt <= sizeof(Element) * 8)
1923                rBit = bits(srcElem1, shiftAmt - 1);
1924            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1925                rBit = 1;
1926            if (shiftAmt >= sizeof(Element) * 8) {
1927                shiftAmt = sizeof(Element) * 8 - 1;
1928                destElem = 0;
1929            } else {
1930                destElem = (srcElem1 >> shiftAmt);
1931            }
1932            // Make sure the right shift sign extended when it should.
1933            if (srcElem1 < 0 && destElem >= 0) {
1934                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1935                                             1 - shiftAmt));
1936            }
1937            destElem += rBit;
1938        } else if (shiftAmt > 0) {
1939            bool sat = false;
1940            if (shiftAmt >= sizeof(Element) * 8) {
1941                if (srcElem1 != 0)
1942                    sat = true;
1943                else
1944                    destElem = 0;
1945            } else {
1946                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1947                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1948                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1949                    sat = true;
1950                } else {
1951                    destElem = srcElem1 << shiftAmt;
1952                }
1953            }
1954            if (sat) {
1955                fpscr.qc = 1;
1956                destElem = mask(sizeof(Element) * 8 - 1);
1957                if (srcElem1 < 0)
1958                    destElem = ~destElem;
1959            }
1960        } else {
1961            destElem = srcElem1;
1962        }
1963        FpscrQc = fpscr;
1964    '''
1965    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1966    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1967
1968    vabaCode = '''
1969        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1970                                            (srcElem2 - srcElem1);
1971    '''
1972    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1973    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1974    vabalCode = '''
1975        destElem += (srcElem1 > srcElem2) ?
1976            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1977            ((BigElement)srcElem2 - (BigElement)srcElem1);
1978    '''
1979    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1980
1981    vabdCode = '''
1982        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1983                                           (srcElem2 - srcElem1);
1984    '''
1985    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1986    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1987    vabdlCode = '''
1988        destElem = (srcElem1 > srcElem2) ?
1989            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1990            ((BigElement)srcElem2 - (BigElement)srcElem1);
1991    '''
1992    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1993
1994    vtstCode = '''
1995        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
1996    '''
1997    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
1998    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
1999
2000    vmulCode = '''
2001        destElem = srcElem1 * srcElem2;
2002    '''
2003    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2004    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2005    vmullCode = '''
2006        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2007    '''
2008    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2009
2010    vmlaCode = '''
2011        destElem = destElem + srcElem1 * srcElem2;
2012    '''
2013    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2014    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2015    vmlalCode = '''
2016        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2017    '''
2018    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2019
2020    vqdmlalCode = '''
2021        FPSCR fpscr = (FPSCR) FpscrQc;
2022        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2023        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2024        Element halfNeg = maxNeg / 2;
2025        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2026            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2027            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2028            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2029            fpscr.qc = 1;
2030        }
2031        bool negPreDest = ltz(destElem);
2032        destElem += midElem;
2033        bool negDest = ltz(destElem);
2034        bool negMid = ltz(midElem);
2035        if (negPreDest == negMid && negMid != negDest) {
2036            destElem = mask(sizeof(BigElement) * 8 - 1);
2037            if (negPreDest)
2038                destElem = ~destElem;
2039            fpscr.qc = 1;
2040        }
2041        FpscrQc = fpscr;
2042    '''
2043    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2044
2045    vqdmlslCode = '''
2046        FPSCR fpscr = (FPSCR) FpscrQc;
2047        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2048        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2049        Element halfNeg = maxNeg / 2;
2050        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2051            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2052            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2053            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2054            fpscr.qc = 1;
2055        }
2056        bool negPreDest = ltz(destElem);
2057        destElem -= midElem;
2058        bool negDest = ltz(destElem);
2059        bool posMid = ltz((BigElement)-midElem);
2060        if (negPreDest == posMid && posMid != negDest) {
2061            destElem = mask(sizeof(BigElement) * 8 - 1);
2062            if (negPreDest)
2063                destElem = ~destElem;
2064            fpscr.qc = 1;
2065        }
2066        FpscrQc = fpscr;
2067    '''
2068    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2069
2070    vqdmullCode = '''
2071        FPSCR fpscr = (FPSCR) FpscrQc;
2072        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2073        if (srcElem1 == srcElem2 &&
2074                srcElem1 == (Element)((Element)1 <<
2075                    (Element)(sizeof(Element) * 8 - 1))) {
2076            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2077            fpscr.qc = 1;
2078        }
2079        FpscrQc = fpscr;
2080    '''
2081    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2082
2083    vmlsCode = '''
2084        destElem = destElem - srcElem1 * srcElem2;
2085    '''
2086    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2087    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2088    vmlslCode = '''
2089        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2090    '''
2091    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2092
2093    vmulpCode = '''
2094        destElem = 0;
2095        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2096            if (bits(srcElem2, j))
2097                destElem ^= srcElem1 << j;
2098        }
2099    '''
2100    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2101    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2102    vmullpCode = '''
2103        destElem = 0;
2104        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2105            if (bits(srcElem2, j))
2106                destElem ^= (BigElement)srcElem1 << j;
2107        }
2108    '''
2109    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2110
2111    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2112
2113    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2114
2115    vqdmulhCode = '''
2116        FPSCR fpscr = (FPSCR) FpscrQc;
2117        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2118                   (sizeof(Element) * 8);
2119        if (srcElem1 == srcElem2 &&
2120                srcElem1 == (Element)((Element)1 <<
2121                    (sizeof(Element) * 8 - 1))) {
2122            destElem = ~srcElem1;
2123            fpscr.qc = 1;
2124        }
2125        FpscrQc = fpscr;
2126    '''
2127    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2128    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2129
2130    vqrdmulhCode = '''
2131        FPSCR fpscr = (FPSCR) FpscrQc;
2132        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2133                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2134                   (sizeof(Element) * 8);
2135        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2136        Element halfNeg = maxNeg / 2;
2137        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2138            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2139            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2140            if (destElem < 0) {
2141                destElem = mask(sizeof(Element) * 8 - 1);
2142            } else {
2143                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2144            }
2145            fpscr.qc = 1;
2146        }
2147        FpscrQc = fpscr;
2148    '''
2149    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2150            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2151    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2152            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2153
2154    vmaxfpCode = '''
2155        FPSCR fpscr = (FPSCR) FpscrExc;
2156        bool done;
2157        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2158        if (!done) {
2159            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2160                               true, true, VfpRoundNearest);
2161        } else if (flushToZero(srcReg1, srcReg2)) {
2162            fpscr.idc = 1;
2163        }
2164        FpscrExc = fpscr;
2165    '''
2166    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2167    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2168
2169    vminfpCode = '''
2170        FPSCR fpscr = (FPSCR) FpscrExc;
2171        bool done;
2172        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2173        if (!done) {
2174            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2175                               true, true, VfpRoundNearest);
2176        } else if (flushToZero(srcReg1, srcReg2)) {
2177            fpscr.idc = 1;
2178        }
2179        FpscrExc = fpscr;
2180    '''
2181    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2182    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2183
2184    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2185                        2, vmaxfpCode, pairwise=True)
2186    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2187                        4, vmaxfpCode, pairwise=True)
2188
2189    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2190                        2, vminfpCode, pairwise=True)
2191    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2192                        4, vminfpCode, pairwise=True)
2193
2194    vaddfpCode = '''
2195        FPSCR fpscr = (FPSCR) FpscrExc;
2196        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2197                           true, true, VfpRoundNearest);
2198        FpscrExc = fpscr;
2199    '''
2200    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2201    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2202
2203    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2204                        2, vaddfpCode, pairwise=True)
2205    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2206                        4, vaddfpCode, pairwise=True)
2207
2208    vsubfpCode = '''
2209        FPSCR fpscr = (FPSCR) FpscrExc;
2210        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2211                           true, true, VfpRoundNearest);
2212        FpscrExc = fpscr;
2213    '''
2214    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2215    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2216
2217    vmulfpCode = '''
2218        FPSCR fpscr = (FPSCR) FpscrExc;
2219        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2220                           true, true, VfpRoundNearest);
2221        FpscrExc = fpscr;
2222    '''
2223    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2224    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2225
2226    vmlafpCode = '''
2227        FPSCR fpscr = (FPSCR) FpscrExc;
2228        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2229                             true, true, VfpRoundNearest);
2230        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2231                           true, true, VfpRoundNearest);
2232        FpscrExc = fpscr;
2233    '''
2234    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2235    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2236
2237    vmlsfpCode = '''
2238        FPSCR fpscr = (FPSCR) FpscrExc;
2239        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2240                             true, true, VfpRoundNearest);
2241        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2242                           true, true, VfpRoundNearest);
2243        FpscrExc = fpscr;
2244    '''
2245    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2246    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2247
2248    vcgtfpCode = '''
2249        FPSCR fpscr = (FPSCR) FpscrExc;
2250        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2251                             true, true, VfpRoundNearest);
2252        destReg = (res == 0) ? -1 : 0;
2253        if (res == 2.0)
2254            fpscr.ioc = 1;
2255        FpscrExc = fpscr;
2256    '''
2257    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2258            2, vcgtfpCode, toInt = True)
2259    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2260            4, vcgtfpCode, toInt = True)
2261
2262    vcgefpCode = '''
2263        FPSCR fpscr = (FPSCR) FpscrExc;
2264        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2265                             true, true, VfpRoundNearest);
2266        destReg = (res == 0) ? -1 : 0;
2267        if (res == 2.0)
2268            fpscr.ioc = 1;
2269        FpscrExc = fpscr;
2270    '''
2271    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2272            2, vcgefpCode, toInt = True)
2273    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2274            4, vcgefpCode, toInt = True)
2275
2276    vacgtfpCode = '''
2277        FPSCR fpscr = (FPSCR) FpscrExc;
2278        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2279                             true, true, VfpRoundNearest);
2280        destReg = (res == 0) ? -1 : 0;
2281        if (res == 2.0)
2282            fpscr.ioc = 1;
2283        FpscrExc = fpscr;
2284    '''
2285    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2286            2, vacgtfpCode, toInt = True)
2287    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2288            4, vacgtfpCode, toInt = True)
2289
2290    vacgefpCode = '''
2291        FPSCR fpscr = (FPSCR) FpscrExc;
2292        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2293                             true, true, VfpRoundNearest);
2294        destReg = (res == 0) ? -1 : 0;
2295        if (res == 2.0)
2296            fpscr.ioc = 1;
2297        FpscrExc = fpscr;
2298    '''
2299    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2300            2, vacgefpCode, toInt = True)
2301    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2302            4, vacgefpCode, toInt = True)
2303
2304    vceqfpCode = '''
2305        FPSCR fpscr = (FPSCR) FpscrExc;
2306        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2307                             true, true, VfpRoundNearest);
2308        destReg = (res == 0) ? -1 : 0;
2309        if (res == 2.0)
2310            fpscr.ioc = 1;
2311        FpscrExc = fpscr;
2312    '''
2313    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2314            2, vceqfpCode, toInt = True)
2315    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2316            4, vceqfpCode, toInt = True)
2317
2318    vrecpsCode = '''
2319        FPSCR fpscr = (FPSCR) FpscrExc;
2320        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2321                           true, true, VfpRoundNearest);
2322        FpscrExc = fpscr;
2323    '''
2324    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2325    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2326
2327    vrsqrtsCode = '''
2328        FPSCR fpscr = (FPSCR) FpscrExc;
2329        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2330                           true, true, VfpRoundNearest);
2331        FpscrExc = fpscr;
2332    '''
2333    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2334    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2335
2336    vabdfpCode = '''
2337        FPSCR fpscr = (FPSCR) FpscrExc;
2338        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2339                             true, true, VfpRoundNearest);
2340        destReg = fabs(mid);
2341        FpscrExc = fpscr;
2342    '''
2343    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2344    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2345
2346    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2347    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2348    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2349    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2350    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2351
2352    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2353    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2354    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2355    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2356    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2357
2358    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2359    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2360    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2361    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2362    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2363
2364    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2365    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2366    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2367    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2368    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2369    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2370            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2371    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2372            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2373
2374    vshrCode = '''
2375        if (imm >= sizeof(srcElem1) * 8) {
2376            if (ltz(srcElem1))
2377                destElem = -1;
2378            else
2379                destElem = 0;
2380        } else {
2381            destElem = srcElem1 >> imm;
2382        }
2383    '''
2384    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2385    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2386
2387    vsraCode = '''
2388        Element mid;;
2389        if (imm >= sizeof(srcElem1) * 8) {
2390            mid = ltz(srcElem1) ? -1 : 0;
2391        } else {
2392            mid = srcElem1 >> imm;
2393            if (ltz(srcElem1) && !ltz(mid)) {
2394                mid |= -(mid & ((Element)1 <<
2395                            (sizeof(Element) * 8 - 1 - imm)));
2396            }
2397        }
2398        destElem += mid;
2399    '''
2400    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2401    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2402
2403    vrshrCode = '''
2404        if (imm > sizeof(srcElem1) * 8) {
2405            destElem = 0;
2406        } else if (imm) {
2407            Element rBit = bits(srcElem1, imm - 1);
2408            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2409        } else {
2410            destElem = srcElem1;
2411        }
2412    '''
2413    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2414    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2415
2416    vrsraCode = '''
2417        if (imm > sizeof(srcElem1) * 8) {
2418            destElem += 0;
2419        } else if (imm) {
2420            Element rBit = bits(srcElem1, imm - 1);
2421            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2422        } else {
2423            destElem += srcElem1;
2424        }
2425    '''
2426    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2427    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2428
2429    vsriCode = '''
2430        if (imm >= sizeof(Element) * 8)
2431            destElem = destElem;
2432        else
2433            destElem = (srcElem1 >> imm) |
2434                (destElem & ~mask(sizeof(Element) * 8 - imm));
2435    '''
2436    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2437    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2438
2439    vshlCode = '''
2440        if (imm >= sizeof(Element) * 8)
2441            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2442        else
2443            destElem = srcElem1 << imm;
2444    '''
2445    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2446    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2447
2448    vsliCode = '''
2449        if (imm >= sizeof(Element) * 8)
2450            destElem = destElem;
2451        else
2452            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2453    '''
2454    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2455    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2456
2457    vqshlCode = '''
2458        FPSCR fpscr = (FPSCR) FpscrQc;
2459        if (imm >= sizeof(Element) * 8) {
2460            if (srcElem1 != 0) {
2461                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2462                if (srcElem1 > 0)
2463                    destElem = ~destElem;
2464                fpscr.qc = 1;
2465            } else {
2466                destElem = 0;
2467            }
2468        } else if (imm) {
2469            destElem = (srcElem1 << imm);
2470            uint64_t topBits = bits((uint64_t)srcElem1,
2471                                    sizeof(Element) * 8 - 1,
2472                                    sizeof(Element) * 8 - 1 - imm);
2473            if (topBits != 0 && topBits != mask(imm + 1)) {
2474                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2475                if (srcElem1 > 0)
2476                    destElem = ~destElem;
2477                fpscr.qc = 1;
2478            }
2479        } else {
2480            destElem = srcElem1;
2481        }
2482        FpscrQc = fpscr;
2483    '''
2484    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2485    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2486
2487    vqshluCode = '''
2488        FPSCR fpscr = (FPSCR) FpscrQc;
2489        if (imm >= sizeof(Element) * 8) {
2490            if (srcElem1 != 0) {
2491                destElem = mask(sizeof(Element) * 8);
2492                fpscr.qc = 1;
2493            } else {
2494                destElem = 0;
2495            }
2496        } else if (imm) {
2497            destElem = (srcElem1 << imm);
2498            uint64_t topBits = bits((uint64_t)srcElem1,
2499                                    sizeof(Element) * 8 - 1,
2500                                    sizeof(Element) * 8 - imm);
2501            if (topBits != 0) {
2502                destElem = mask(sizeof(Element) * 8);
2503                fpscr.qc = 1;
2504            }
2505        } else {
2506            destElem = srcElem1;
2507        }
2508        FpscrQc = fpscr;
2509    '''
2510    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2511    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2512
2513    vqshlusCode = '''
2514        FPSCR fpscr = (FPSCR) FpscrQc;
2515        if (imm >= sizeof(Element) * 8) {
2516            if (srcElem1 < 0) {
2517                destElem = 0;
2518                fpscr.qc = 1;
2519            } else if (srcElem1 > 0) {
2520                destElem = mask(sizeof(Element) * 8);
2521                fpscr.qc = 1;
2522            } else {
2523                destElem = 0;
2524            }
2525        } else if (imm) {
2526            destElem = (srcElem1 << imm);
2527            uint64_t topBits = bits((uint64_t)srcElem1,
2528                                    sizeof(Element) * 8 - 1,
2529                                    sizeof(Element) * 8 - imm);
2530            if (srcElem1 < 0) {
2531                destElem = 0;
2532                fpscr.qc = 1;
2533            } else if (topBits != 0) {
2534                destElem = mask(sizeof(Element) * 8);
2535                fpscr.qc = 1;
2536            }
2537        } else {
2538            if (srcElem1 < 0) {
2539                fpscr.qc = 1;
2540                destElem = 0;
2541            } else {
2542                destElem = srcElem1;
2543            }
2544        }
2545        FpscrQc = fpscr;
2546    '''
2547    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2548    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2549
2550    vshrnCode = '''
2551        if (imm >= sizeof(srcElem1) * 8) {
2552            destElem = 0;
2553        } else {
2554            destElem = srcElem1 >> imm;
2555        }
2556    '''
2557    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2558
2559    vrshrnCode = '''
2560        if (imm > sizeof(srcElem1) * 8) {
2561            destElem = 0;
2562        } else if (imm) {
2563            Element rBit = bits(srcElem1, imm - 1);
2564            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2565        } else {
2566            destElem = srcElem1;
2567        }
2568    '''
2569    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2570
2571    vqshrnCode = '''
2572        FPSCR fpscr = (FPSCR) FpscrQc;
2573        if (imm > sizeof(srcElem1) * 8) {
2574            if (srcElem1 != 0 && srcElem1 != -1)
2575                fpscr.qc = 1;
2576            destElem = 0;
2577        } else if (imm) {
2578            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2579            mid |= -(mid & ((BigElement)1 <<
2580                        (sizeof(BigElement) * 8 - 1 - imm)));
2581            if (mid != (Element)mid) {
2582                destElem = mask(sizeof(Element) * 8 - 1);
2583                if (srcElem1 < 0)
2584                    destElem = ~destElem;
2585                fpscr.qc = 1;
2586            } else {
2587                destElem = mid;
2588            }
2589        } else {
2590            destElem = srcElem1;
2591        }
2592        FpscrQc = fpscr;
2593    '''
2594    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2595
2596    vqshrunCode = '''
2597        FPSCR fpscr = (FPSCR) FpscrQc;
2598        if (imm > sizeof(srcElem1) * 8) {
2599            if (srcElem1 != 0)
2600                fpscr.qc = 1;
2601            destElem = 0;
2602        } else if (imm) {
2603            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2604            if (mid != (Element)mid) {
2605                destElem = mask(sizeof(Element) * 8);
2606                fpscr.qc = 1;
2607            } else {
2608                destElem = mid;
2609            }
2610        } else {
2611            destElem = srcElem1;
2612        }
2613        FpscrQc = fpscr;
2614    '''
2615    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2616                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2617
2618    vqshrunsCode = '''
2619        FPSCR fpscr = (FPSCR) FpscrQc;
2620        if (imm > sizeof(srcElem1) * 8) {
2621            if (srcElem1 != 0)
2622                fpscr.qc = 1;
2623            destElem = 0;
2624        } else if (imm) {
2625            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2626            if (bits(mid, sizeof(BigElement) * 8 - 1,
2627                          sizeof(Element) * 8) != 0) {
2628                if (srcElem1 < 0) {
2629                    destElem = 0;
2630                } else {
2631                    destElem = mask(sizeof(Element) * 8);
2632                }
2633                fpscr.qc = 1;
2634            } else {
2635                destElem = mid;
2636            }
2637        } else {
2638            destElem = srcElem1;
2639        }
2640        FpscrQc = fpscr;
2641    '''
2642    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2643                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2644
2645    vqrshrnCode = '''
2646        FPSCR fpscr = (FPSCR) FpscrQc;
2647        if (imm > sizeof(srcElem1) * 8) {
2648            if (srcElem1 != 0 && srcElem1 != -1)
2649                fpscr.qc = 1;
2650            destElem = 0;
2651        } else if (imm) {
2652            BigElement mid = (srcElem1 >> (imm - 1));
2653            uint64_t rBit = mid & 0x1;
2654            mid >>= 1;
2655            mid |= -(mid & ((BigElement)1 <<
2656                        (sizeof(BigElement) * 8 - 1 - imm)));
2657            mid += rBit;
2658            if (mid != (Element)mid) {
2659                destElem = mask(sizeof(Element) * 8 - 1);
2660                if (srcElem1 < 0)
2661                    destElem = ~destElem;
2662                fpscr.qc = 1;
2663            } else {
2664                destElem = mid;
2665            }
2666        } else {
2667            if (srcElem1 != (Element)srcElem1) {
2668                destElem = mask(sizeof(Element) * 8 - 1);
2669                if (srcElem1 < 0)
2670                    destElem = ~destElem;
2671                fpscr.qc = 1;
2672            } else {
2673                destElem = srcElem1;
2674            }
2675        }
2676        FpscrQc = fpscr;
2677    '''
2678    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2679                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2680
2681    vqrshrunCode = '''
2682        FPSCR fpscr = (FPSCR) FpscrQc;
2683        if (imm > sizeof(srcElem1) * 8) {
2684            if (srcElem1 != 0)
2685                fpscr.qc = 1;
2686            destElem = 0;
2687        } else if (imm) {
2688            BigElement mid = (srcElem1 >> (imm - 1));
2689            uint64_t rBit = mid & 0x1;
2690            mid >>= 1;
2691            mid += rBit;
2692            if (mid != (Element)mid) {
2693                destElem = mask(sizeof(Element) * 8);
2694                fpscr.qc = 1;
2695            } else {
2696                destElem = mid;
2697            }
2698        } else {
2699            if (srcElem1 != (Element)srcElem1) {
2700                destElem = mask(sizeof(Element) * 8 - 1);
2701                fpscr.qc = 1;
2702            } else {
2703                destElem = srcElem1;
2704            }
2705        }
2706        FpscrQc = fpscr;
2707    '''
2708    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2709                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2710
2711    vqrshrunsCode = '''
2712        FPSCR fpscr = (FPSCR) FpscrQc;
2713        if (imm > sizeof(srcElem1) * 8) {
2714            if (srcElem1 != 0)
2715                fpscr.qc = 1;
2716            destElem = 0;
2717        } else if (imm) {
2718            BigElement mid = (srcElem1 >> (imm - 1));
2719            uint64_t rBit = mid & 0x1;
2720            mid >>= 1;
2721            mid |= -(mid & ((BigElement)1 <<
2722                            (sizeof(BigElement) * 8 - 1 - imm)));
2723            mid += rBit;
2724            if (bits(mid, sizeof(BigElement) * 8 - 1,
2725                          sizeof(Element) * 8) != 0) {
2726                if (srcElem1 < 0) {
2727                    destElem = 0;
2728                } else {
2729                    destElem = mask(sizeof(Element) * 8);
2730                }
2731                fpscr.qc = 1;
2732            } else {
2733                destElem = mid;
2734            }
2735        } else {
2736            if (srcElem1 < 0) {
2737                fpscr.qc = 1;
2738                destElem = 0;
2739            } else {
2740                destElem = srcElem1;
2741            }
2742        }
2743        FpscrQc = fpscr;
2744    '''
2745    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2746                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2747
2748    vshllCode = '''
2749        if (imm >= sizeof(destElem) * 8) {
2750            destElem = 0;
2751        } else {
2752            destElem = (BigElement)srcElem1 << imm;
2753        }
2754    '''
2755    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2756
2757    vmovlCode = '''
2758        destElem = srcElem1;
2759    '''
2760    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2761
2762    vcvt2ufxCode = '''
2763        FPSCR fpscr = (FPSCR) FpscrExc;
2764        if (flushToZero(srcElem1))
2765            fpscr.idc = 1;
2766        VfpSavedState state = prepFpState(VfpRoundNearest);
2767        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2768        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2769        __asm__ __volatile__("" :: "m" (destReg));
2770        finishVfp(fpscr, state, true);
2771        FpscrExc = fpscr;
2772    '''
2773    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2774            2, vcvt2ufxCode, toInt = True)
2775    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2776            4, vcvt2ufxCode, toInt = True)
2777
2778    vcvt2sfxCode = '''
2779        FPSCR fpscr = (FPSCR) FpscrExc;
2780        if (flushToZero(srcElem1))
2781            fpscr.idc = 1;
2782        VfpSavedState state = prepFpState(VfpRoundNearest);
2783        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2784        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2785        __asm__ __volatile__("" :: "m" (destReg));
2786        finishVfp(fpscr, state, true);
2787        FpscrExc = fpscr;
2788    '''
2789    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2790            2, vcvt2sfxCode, toInt = True)
2791    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2792            4, vcvt2sfxCode, toInt = True)
2793
2794    vcvtu2fpCode = '''
2795        FPSCR fpscr = (FPSCR) FpscrExc;
2796        VfpSavedState state = prepFpState(VfpRoundNearest);
2797        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2798        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2799        __asm__ __volatile__("" :: "m" (destElem));
2800        finishVfp(fpscr, state, true);
2801        FpscrExc = fpscr;
2802    '''
2803    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2804            2, vcvtu2fpCode, fromInt = True)
2805    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2806            4, vcvtu2fpCode, fromInt = True)
2807
2808    vcvts2fpCode = '''
2809        FPSCR fpscr = (FPSCR) FpscrExc;
2810        VfpSavedState state = prepFpState(VfpRoundNearest);
2811        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2812        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2813        __asm__ __volatile__("" :: "m" (destElem));
2814        finishVfp(fpscr, state, true);
2815        FpscrExc = fpscr;
2816    '''
2817    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2818            2, vcvts2fpCode, fromInt = True)
2819    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2820            4, vcvts2fpCode, fromInt = True)
2821
2822    vcvts2hCode = '''
2823        destElem = 0;
2824        FPSCR fpscr = (FPSCR) FpscrExc;
2825        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2826        if (flushToZero(srcFp1))
2827            fpscr.idc = 1;
2828        VfpSavedState state = prepFpState(VfpRoundNearest);
2829        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2830                                : "m" (srcFp1), "m" (destElem));
2831        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2832                              fpscr.ahp, srcFp1);
2833        __asm__ __volatile__("" :: "m" (destElem));
2834        finishVfp(fpscr, state, true);
2835        FpscrExc = fpscr;
2836    '''
2837    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2838
2839    vcvth2sCode = '''
2840        destElem = 0;
2841        FPSCR fpscr = (FPSCR) FpscrExc;
2842        VfpSavedState state = prepFpState(VfpRoundNearest);
2843        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2844                                : "m" (srcElem1), "m" (destElem));
2845        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2846        __asm__ __volatile__("" :: "m" (destElem));
2847        finishVfp(fpscr, state, true);
2848        FpscrExc = fpscr;
2849    '''
2850    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2851
2852    vrsqrteCode = '''
2853        destElem = unsignedRSqrtEstimate(srcElem1);
2854    '''
2855    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2856    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2857
2858    vrsqrtefpCode = '''
2859        FPSCR fpscr = (FPSCR) FpscrExc;
2860        if (flushToZero(srcReg1))
2861            fpscr.idc = 1;
2862        destReg = fprSqrtEstimate(fpscr, srcReg1);
2863        FpscrExc = fpscr;
2864    '''
2865    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2866    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2867
2868    vrecpeCode = '''
2869        destElem = unsignedRecipEstimate(srcElem1);
2870    '''
2871    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2872    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2873
2874    vrecpefpCode = '''
2875        FPSCR fpscr = (FPSCR) FpscrExc;
2876        if (flushToZero(srcReg1))
2877            fpscr.idc = 1;
2878        destReg = fpRecipEstimate(fpscr, srcReg1);
2879        FpscrExc = fpscr;
2880    '''
2881    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2882    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2883
2884    vrev16Code = '''
2885        destElem = srcElem1;
2886        unsigned groupSize = ((1 << 1) / sizeof(Element));
2887        unsigned reverseMask = (groupSize - 1);
2888        j = i ^ reverseMask;
2889    '''
2890    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2891    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2892    vrev32Code = '''
2893        destElem = srcElem1;
2894        unsigned groupSize = ((1 << 2) / sizeof(Element));
2895        unsigned reverseMask = (groupSize - 1);
2896        j = i ^ reverseMask;
2897    '''
2898    twoRegMiscInst("vrev32", "NVrev32D",
2899            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2900    twoRegMiscInst("vrev32", "NVrev32Q",
2901            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2902    vrev64Code = '''
2903        destElem = srcElem1;
2904        unsigned groupSize = ((1 << 3) / sizeof(Element));
2905        unsigned reverseMask = (groupSize - 1);
2906        j = i ^ reverseMask;
2907    '''
2908    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2909    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2910
2911    vpaddlCode = '''
2912        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2913    '''
2914    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2915    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2916
2917    vpadalCode = '''
2918        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2919    '''
2920    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2921    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2922
2923    vclsCode = '''
2924        unsigned count = 0;
2925        if (srcElem1 < 0) {
2926            srcElem1 <<= 1;
2927            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2928                count++;
2929                srcElem1 <<= 1;
2930            }
2931        } else {
2932            srcElem1 <<= 1;
2933            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2934                count++;
2935                srcElem1 <<= 1;
2936            }
2937        }
2938        destElem = count;
2939    '''
2940    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2941    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2942
2943    vclzCode = '''
2944        unsigned count = 0;
2945        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2946            count++;
2947            srcElem1 <<= 1;
2948        }
2949        destElem = count;
2950    '''
2951    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2952    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2953
2954    vcntCode = '''
2955        unsigned count = 0;
2956        while (srcElem1 && count < sizeof(Element) * 8) {
2957            count += srcElem1 & 0x1;
2958            srcElem1 >>= 1;
2959        }
2960        destElem = count;
2961    '''
2962
2963    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2964    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2965
2966    vmvnCode = '''
2967        destElem = ~srcElem1;
2968    '''
2969    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2970    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2971
2972    vqabsCode = '''
2973        FPSCR fpscr = (FPSCR) FpscrQc;
2974        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2975            fpscr.qc = 1;
2976            destElem = ~srcElem1;
2977        } else if (srcElem1 < 0) {
2978            destElem = -srcElem1;
2979        } else {
2980            destElem = srcElem1;
2981        }
2982        FpscrQc = fpscr;
2983    '''
2984    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2985    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2986
2987    vqnegCode = '''
2988        FPSCR fpscr = (FPSCR) FpscrQc;
2989        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2990            fpscr.qc = 1;
2991            destElem = ~srcElem1;
2992        } else {
2993            destElem = -srcElem1;
2994        }
2995        FpscrQc = fpscr;
2996    '''
2997    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
2998    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
2999
3000    vabsCode = '''
3001        if (srcElem1 < 0) {
3002            destElem = -srcElem1;
3003        } else {
3004            destElem = srcElem1;
3005        }
3006    '''
3007
3008    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3009    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3010    vabsfpCode = '''
3011        union
3012        {
3013            uint32_t i;
3014            float f;
3015        } cStruct;
3016        cStruct.f = srcReg1;
3017        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3018        destReg = cStruct.f;
3019    '''
3020    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3021    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3022
3023    vnegCode = '''
3024        destElem = -srcElem1;
3025    '''
3026    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3027    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3028    vnegfpCode = '''
3029        destReg = -srcReg1;
3030    '''
3031    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3032    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3033
3034    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3035    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3036    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3037    vcgtfpCode = '''
3038        FPSCR fpscr = (FPSCR) FpscrExc;
3039        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3040                             true, true, VfpRoundNearest);
3041        destReg = (res == 0) ? -1 : 0;
3042        if (res == 2.0)
3043            fpscr.ioc = 1;
3044        FpscrExc = fpscr;
3045    '''
3046    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3047            2, vcgtfpCode, toInt = True)
3048    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3049            4, vcgtfpCode, toInt = True)
3050
3051    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3052    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3053    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3054    vcgefpCode = '''
3055        FPSCR fpscr = (FPSCR) FpscrExc;
3056        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3057                             true, true, VfpRoundNearest);
3058        destReg = (res == 0) ? -1 : 0;
3059        if (res == 2.0)
3060            fpscr.ioc = 1;
3061        FpscrExc = fpscr;
3062    '''
3063    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3064            2, vcgefpCode, toInt = True)
3065    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3066            4, vcgefpCode, toInt = True)
3067
3068    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3069    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3070    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3071    vceqfpCode = '''
3072        FPSCR fpscr = (FPSCR) FpscrExc;
3073        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3074                             true, true, VfpRoundNearest);
3075        destReg = (res == 0) ? -1 : 0;
3076        if (res == 2.0)
3077            fpscr.ioc = 1;
3078        FpscrExc = fpscr;
3079    '''
3080    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3081            2, vceqfpCode, toInt = True)
3082    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3083            4, vceqfpCode, toInt = True)
3084
3085    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3086    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3087    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3088    vclefpCode = '''
3089        FPSCR fpscr = (FPSCR) FpscrExc;
3090        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3091                             true, true, VfpRoundNearest);
3092        destReg = (res == 0) ? -1 : 0;
3093        if (res == 2.0)
3094            fpscr.ioc = 1;
3095        FpscrExc = fpscr;
3096    '''
3097    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3098            2, vclefpCode, toInt = True)
3099    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3100            4, vclefpCode, toInt = True)
3101
3102    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3103    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3104    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3105    vcltfpCode = '''
3106        FPSCR fpscr = (FPSCR) FpscrExc;
3107        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3108                             true, true, VfpRoundNearest);
3109        destReg = (res == 0) ? -1 : 0;
3110        if (res == 2.0)
3111            fpscr.ioc = 1;
3112        FpscrExc = fpscr;
3113    '''
3114    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3115            2, vcltfpCode, toInt = True)
3116    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3117            4, vcltfpCode, toInt = True)
3118
3119    vswpCode = '''
3120        FloatRegBits mid;
3121        for (unsigned r = 0; r < rCount; r++) {
3122            mid = srcReg1.regs[r];
3123            srcReg1.regs[r] = destReg.regs[r];
3124            destReg.regs[r] = mid;
3125        }
3126    '''
3127    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3128    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3129
3130    vtrnCode = '''
3131        Element mid;
3132        for (unsigned i = 0; i < eCount; i += 2) {
3133            mid = srcReg1.elements[i];
3134            srcReg1.elements[i] = destReg.elements[i + 1];
3135            destReg.elements[i + 1] = mid;
3136        }
3137    '''
3138    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3139            smallUnsignedTypes, 2, vtrnCode)
3140    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3141            smallUnsignedTypes, 4, vtrnCode)
3142
3143    vuzpCode = '''
3144        Element mid[eCount];
3145        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3146        for (unsigned i = 0; i < eCount / 2; i++) {
3147            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3148            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3149            destReg.elements[i] = destReg.elements[2 * i];
3150        }
3151        for (unsigned i = 0; i < eCount / 2; i++) {
3152            destReg.elements[eCount / 2 + i] = mid[2 * i];
3153        }
3154    '''
3155    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3156    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3157
3158    vzipCode = '''
3159        Element mid[eCount];
3160        memcpy(&mid, &destReg, sizeof(destReg));
3161        for (unsigned i = 0; i < eCount / 2; i++) {
3162            destReg.elements[2 * i] = mid[i];
3163            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3164        }
3165        for (int i = 0; i < eCount / 2; i++) {
3166            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3167            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3168        }
3169    '''
3170    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3171    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3172
3173    vmovnCode = 'destElem = srcElem1;'
3174    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3175
3176    vdupCode = 'destElem = srcElem1;'
3177    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3178    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3179
3180    def vdupGprInst(name, Name, opClass, types, rCount):
3181        global header_output, exec_output
3182        eWalkCode = '''
3183        RegVect destReg;
3184        for (unsigned i = 0; i < eCount; i++) {
3185            destReg.elements[i] = htog((Element)Op1);
3186        }
3187        '''
3188        for reg in range(rCount):
3189            eWalkCode += '''
3190            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3191            ''' % { "reg" : reg }
3192        iop = InstObjParams(name, Name,
3193                            "RegRegOp",
3194                            { "code": eWalkCode,
3195                              "r_count": rCount,
3196                              "predicate_test": predicateTest,
3197                              "op_class": opClass }, [])
3198        header_output += NeonRegRegOpDeclare.subst(iop)
3199        exec_output += NeonEqualRegExecute.subst(iop)
3200        for type in types:
3201            substDict = { "targs" : type,
3202                          "class_name" : Name }
3203            exec_output += NeonExecDeclare.subst(substDict)
3204    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3205    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3206
3207    vmovCode = 'destElem = imm;'
3208    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3209    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3210
3211    vorrCode = 'destElem |= imm;'
3212    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3213    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3214
3215    vmvnCode = 'destElem = ~imm;'
3216    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3217    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3218
3219    vbicCode = 'destElem &= ~imm;'
3220    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3221    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3222
3223    vqmovnCode = '''
3224    FPSCR fpscr = (FPSCR) FpscrQc;
3225    destElem = srcElem1;
3226    if ((BigElement)destElem != srcElem1) {
3227        fpscr.qc = 1;
3228        destElem = mask(sizeof(Element) * 8 - 1);
3229        if (srcElem1 < 0)
3230            destElem = ~destElem;
3231    }
3232    FpscrQc = fpscr;
3233    '''
3234    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3235
3236    vqmovunCode = '''
3237    FPSCR fpscr = (FPSCR) FpscrQc;
3238    destElem = srcElem1;
3239    if ((BigElement)destElem != srcElem1) {
3240        fpscr.qc = 1;
3241        destElem = mask(sizeof(Element) * 8);
3242    }
3243    FpscrQc = fpscr;
3244    '''
3245    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3246            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3247
3248    vqmovunsCode = '''
3249    FPSCR fpscr = (FPSCR) FpscrQc;
3250    destElem = srcElem1;
3251    if (srcElem1 < 0 ||
3252            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3253        fpscr.qc = 1;
3254        destElem = mask(sizeof(Element) * 8);
3255        if (srcElem1 < 0)
3256            destElem = ~destElem;
3257    }
3258    FpscrQc = fpscr;
3259    '''
3260    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3261            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3262
3263    def buildVext(name, Name, opClass, types, rCount, op):
3264        global header_output, exec_output
3265        eWalkCode = '''
3266        RegVect srcReg1, srcReg2, destReg;
3267        '''
3268        for reg in range(rCount):
3269            eWalkCode += simdEnabledCheckCode + '''
3270                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3271                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3272            ''' % { "reg" : reg }
3273        eWalkCode += op
3274        for reg in range(rCount):
3275            eWalkCode += '''
3276            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3277            ''' % { "reg" : reg }
3278        iop = InstObjParams(name, Name,
3279                            "RegRegRegImmOp",
3280                            { "code": eWalkCode,
3281                              "r_count": rCount,
3282                              "predicate_test": predicateTest,
3283                              "op_class": opClass }, [])
3284        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3285        exec_output += NeonEqualRegExecute.subst(iop)
3286        for type in types:
3287            substDict = { "targs" : type,
3288                          "class_name" : Name }
3289            exec_output += NeonExecDeclare.subst(substDict)
3290
3291    vextCode = '''
3292        for (unsigned i = 0; i < eCount; i++) {
3293            unsigned index = i + imm;
3294            if (index < eCount) {
3295                destReg.elements[i] = srcReg1.elements[index];
3296            } else {
3297                index -= eCount;
3298                if (index >= eCount) {
3299                    if (FullSystem)
3300                        fault = new UndefinedInstruction;
3301                    else
3302                        fault = new UndefinedInstruction(false, mnemonic);
3303                } else {
3304                    destReg.elements[i] = srcReg2.elements[index];
3305                }
3306            }
3307        }
3308    '''
3309    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3310    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3311
3312    def buildVtbxl(name, Name, opClass, length, isVtbl):
3313        global header_output, decoder_output, exec_output
3314        code = '''
3315            union
3316            {
3317                uint8_t bytes[32];
3318                FloatRegBits regs[8];
3319            } table;
3320
3321            union
3322            {
3323                uint8_t bytes[8];
3324                FloatRegBits regs[2];
3325            } destReg, srcReg2;
3326
3327            const unsigned length = %(length)d;
3328            const bool isVtbl = %(isVtbl)s;
3329
3330            srcReg2.regs[0] = htog(FpOp2P0_uw);
3331            srcReg2.regs[1] = htog(FpOp2P1_uw);
3332
3333            destReg.regs[0] = htog(FpDestP0_uw);
3334            destReg.regs[1] = htog(FpDestP1_uw);
3335        ''' % { "length" : length, "isVtbl" : isVtbl }
3336        for reg in range(8):
3337            if reg < length * 2:
3338                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3339                        { "reg" : reg }
3340            else:
3341                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3342        code += '''
3343        for (unsigned i = 0; i < sizeof(destReg); i++) {
3344            uint8_t index = srcReg2.bytes[i];
3345            if (index < 8 * length) {
3346                destReg.bytes[i] = table.bytes[index];
3347            } else {
3348                if (isVtbl)
3349                    destReg.bytes[i] = 0;
3350                // else destReg.bytes[i] unchanged
3351            }
3352        }
3353
3354        FpDestP0_uw = gtoh(destReg.regs[0]);
3355        FpDestP1_uw = gtoh(destReg.regs[1]);
3356        '''
3357        iop = InstObjParams(name, Name,
3358                            "RegRegRegOp",
3359                            { "code": code,
3360                              "predicate_test": predicateTest,
3361                              "op_class": opClass }, [])
3362        header_output += RegRegRegOpDeclare.subst(iop)
3363        decoder_output += RegRegRegOpConstructor.subst(iop)
3364        exec_output += PredOpExecute.subst(iop)
3365
3366    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3367    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3368    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3369    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3370
3371    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3372    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3373    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3374    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3375}};
3376