neon.isa revision 8588:ef28ed90449d
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                         ExtMachInst machInst, IntRegIndex dest,
98                         IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                         ExtMachInst machInst, IntRegIndex dest,
116                         IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
133                           ExtMachInst machInst, IntRegIndex dest,
134                           IntRegIndex op1, IntRegIndex op2)
135    {
136        if (notSigned) {
137            return decodeNeonUThreeUSReg<Base>(
138                    size, machInst, dest, op1, op2);
139        } else {
140            return decodeNeonSThreeUSReg<Base>(
141                    size, machInst, dest, op1, op2);
142        }
143    }
144
145    template <template <typename T> class BaseD,
146              template <typename T> class BaseQ>
147    StaticInstPtr
148    decodeNeonUThreeSReg(bool q, unsigned size,
149                         ExtMachInst machInst, IntRegIndex dest,
150                         IntRegIndex op1, IntRegIndex op2)
151    {
152        if (q) {
153            return decodeNeonUThreeUSReg<BaseQ>(
154                    size, machInst, dest, op1, op2);
155        } else {
156            return decodeNeonUThreeUSReg<BaseD>(
157                    size, machInst, dest, op1, op2);
158        }
159    }
160
161    template <template <typename T> class BaseD,
162              template <typename T> class BaseQ>
163    StaticInstPtr
164    decodeNeonSThreeSReg(bool q, unsigned size,
165                         ExtMachInst machInst, IntRegIndex dest,
166                         IntRegIndex op1, IntRegIndex op2)
167    {
168        if (q) {
169            return decodeNeonSThreeUSReg<BaseQ>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<BaseD>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
181                          ExtMachInst machInst, IntRegIndex dest,
182                          IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeSReg<BaseD, BaseQ>(
186                    q, size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeSReg<BaseD, BaseQ>(
189                    q, size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeReg(bool q, unsigned size,
197                        ExtMachInst machInst, IntRegIndex dest,
198                        IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeReg(bool q, unsigned size,
213                        ExtMachInst machInst, IntRegIndex dest,
214                        IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (notSigned) {
233            return decodeNeonUThreeReg<BaseD, BaseQ>(
234                    q, size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeReg<BaseD, BaseQ>(
237                    q, size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUTwoShiftReg(bool q, unsigned size,
245                           ExtMachInst machInst, IntRegIndex dest,
246                           IntRegIndex op1, uint64_t imm)
247    {
248        if (q) {
249            switch (size) {
250              case 0:
251                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
252              case 1:
253                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
254              case 2:
255                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
256              case 3:
257                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
258              default:
259                return new Unknown(machInst);
260            }
261        } else {
262            switch (size) {
263              case 0:
264                return new BaseD<uint8_t>(machInst, dest, op1, imm);
265              case 1:
266                return new BaseD<uint16_t>(machInst, dest, op1, imm);
267              case 2:
268                return new BaseD<uint32_t>(machInst, dest, op1, imm);
269              case 3:
270                return new BaseD<uint64_t>(machInst, dest, op1, imm);
271              default:
272                return new Unknown(machInst);
273            }
274        }
275    }
276
277    template <template <typename T> class BaseD,
278              template <typename T> class BaseQ>
279    StaticInstPtr
280    decodeNeonSTwoShiftReg(bool q, unsigned size,
281                           ExtMachInst machInst, IntRegIndex dest,
282                           IntRegIndex op1, uint64_t imm)
283    {
284        if (q) {
285            switch (size) {
286              case 0:
287                return new BaseQ<int8_t>(machInst, dest, op1, imm);
288              case 1:
289                return new BaseQ<int16_t>(machInst, dest, op1, imm);
290              case 2:
291                return new BaseQ<int32_t>(machInst, dest, op1, imm);
292              case 3:
293                return new BaseQ<int64_t>(machInst, dest, op1, imm);
294              default:
295                return new Unknown(machInst);
296            }
297        } else {
298            switch (size) {
299              case 0:
300                return new BaseD<int8_t>(machInst, dest, op1, imm);
301              case 1:
302                return new BaseD<int16_t>(machInst, dest, op1, imm);
303              case 2:
304                return new BaseD<int32_t>(machInst, dest, op1, imm);
305              case 3:
306                return new BaseD<int64_t>(machInst, dest, op1, imm);
307              default:
308                return new Unknown(machInst);
309            }
310        }
311    }
312
313
314    template <template <typename T> class BaseD,
315              template <typename T> class BaseQ>
316    StaticInstPtr
317    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
318                            ExtMachInst machInst, IntRegIndex dest,
319                            IntRegIndex op1, uint64_t imm)
320    {
321        if (notSigned) {
322            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
323                    q, size, machInst, dest, op1, imm);
324        } else {
325            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
326                    q, size, machInst, dest, op1, imm);
327        }
328    }
329
330    template <template <typename T> class Base>
331    StaticInstPtr
332    decodeNeonUTwoShiftUSReg(unsigned size,
333                             ExtMachInst machInst, IntRegIndex dest,
334                             IntRegIndex op1, uint64_t imm)
335    {
336        switch (size) {
337          case 0:
338            return new Base<uint8_t>(machInst, dest, op1, imm);
339          case 1:
340            return new Base<uint16_t>(machInst, dest, op1, imm);
341          case 2:
342            return new Base<uint32_t>(machInst, dest, op1, imm);
343          default:
344            return new Unknown(machInst);
345        }
346    }
347
348    template <template <typename T> class BaseD,
349              template <typename T> class BaseQ>
350    StaticInstPtr
351    decodeNeonUTwoShiftSReg(bool q, unsigned size,
352                            ExtMachInst machInst, IntRegIndex dest,
353                            IntRegIndex op1, uint64_t imm)
354    {
355        if (q) {
356            return decodeNeonUTwoShiftUSReg<BaseQ>(
357                    size, machInst, dest, op1, imm);
358        } else {
359            return decodeNeonUTwoShiftUSReg<BaseD>(
360                    size, machInst, dest, op1, imm);
361        }
362    }
363
364    template <template <typename T> class Base>
365    StaticInstPtr
366    decodeNeonSTwoShiftUSReg(unsigned size,
367                             ExtMachInst machInst, IntRegIndex dest,
368                             IntRegIndex op1, uint64_t imm)
369    {
370        switch (size) {
371          case 0:
372            return new Base<int8_t>(machInst, dest, op1, imm);
373          case 1:
374            return new Base<int16_t>(machInst, dest, op1, imm);
375          case 2:
376            return new Base<int32_t>(machInst, dest, op1, imm);
377          default:
378            return new Unknown(machInst);
379        }
380    }
381
382    template <template <typename T> class BaseD,
383              template <typename T> class BaseQ>
384    StaticInstPtr
385    decodeNeonSTwoShiftSReg(bool q, unsigned size,
386                            ExtMachInst machInst, IntRegIndex dest,
387                            IntRegIndex op1, uint64_t imm)
388    {
389        if (q) {
390            return decodeNeonSTwoShiftUSReg<BaseQ>(
391                    size, machInst, dest, op1, imm);
392        } else {
393            return decodeNeonSTwoShiftUSReg<BaseD>(
394                    size, machInst, dest, op1, imm);
395        }
396    }
397
398    template <template <typename T> class BaseD,
399              template <typename T> class BaseQ>
400    StaticInstPtr
401    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
402                             ExtMachInst machInst, IntRegIndex dest,
403                             IntRegIndex op1, uint64_t imm)
404    {
405        if (notSigned) {
406            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
407                    q, size, machInst, dest, op1, imm);
408        } else {
409            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
410                    q, size, machInst, dest, op1, imm);
411        }
412    }
413
414    template <template <typename T> class Base>
415    StaticInstPtr
416    decodeNeonUTwoMiscUSReg(unsigned size,
417                            ExtMachInst machInst, IntRegIndex dest,
418                            IntRegIndex op1)
419    {
420        switch (size) {
421          case 0:
422            return new Base<uint8_t>(machInst, dest, op1);
423          case 1:
424            return new Base<uint16_t>(machInst, dest, op1);
425          case 2:
426            return new Base<uint32_t>(machInst, dest, op1);
427          default:
428            return new Unknown(machInst);
429        }
430    }
431
432    template <template <typename T> class Base>
433    StaticInstPtr
434    decodeNeonSTwoMiscUSReg(unsigned size,
435                            ExtMachInst machInst, IntRegIndex dest,
436                            IntRegIndex op1)
437    {
438        switch (size) {
439          case 0:
440            return new Base<int8_t>(machInst, dest, op1);
441          case 1:
442            return new Base<int16_t>(machInst, dest, op1);
443          case 2:
444            return new Base<int32_t>(machInst, dest, op1);
445          default:
446            return new Unknown(machInst);
447        }
448    }
449
450    template <template <typename T> class BaseD,
451              template <typename T> class BaseQ>
452    StaticInstPtr
453    decodeNeonUTwoMiscSReg(bool q, unsigned size,
454                          ExtMachInst machInst, IntRegIndex dest,
455                          IntRegIndex op1)
456    {
457        if (q) {
458            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
459        } else {
460            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
461        }
462    }
463
464    template <template <typename T> class BaseD,
465              template <typename T> class BaseQ>
466    StaticInstPtr
467    decodeNeonSTwoMiscSReg(bool q, unsigned size,
468                          ExtMachInst machInst, IntRegIndex dest,
469                          IntRegIndex op1)
470    {
471        if (q) {
472            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
473        } else {
474            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
475        }
476    }
477
478    template <template <typename T> class Base>
479    StaticInstPtr
480    decodeNeonUTwoMiscUReg(unsigned size,
481                           ExtMachInst machInst, IntRegIndex dest,
482                           IntRegIndex op1)
483    {
484        switch (size) {
485          case 0:
486            return new Base<uint8_t>(machInst, dest, op1);
487          case 1:
488            return new Base<uint16_t>(machInst, dest, op1);
489          case 2:
490            return new Base<uint32_t>(machInst, dest, op1);
491          case 3:
492            return new Base<uint64_t>(machInst, dest, op1);
493          default:
494            return new Unknown(machInst);
495        }
496    }
497
498    template <template <typename T> class Base>
499    StaticInstPtr
500    decodeNeonSTwoMiscUReg(unsigned size,
501                            ExtMachInst machInst, IntRegIndex dest,
502                            IntRegIndex op1)
503    {
504        switch (size) {
505          case 0:
506            return new Base<int8_t>(machInst, dest, op1);
507          case 1:
508            return new Base<int16_t>(machInst, dest, op1);
509          case 2:
510            return new Base<int32_t>(machInst, dest, op1);
511          case 3:
512            return new Base<int64_t>(machInst, dest, op1);
513          default:
514            return new Unknown(machInst);
515        }
516    }
517
518    template <template <typename T> class BaseD,
519              template <typename T> class BaseQ>
520    StaticInstPtr
521    decodeNeonSTwoMiscReg(bool q, unsigned size,
522                          ExtMachInst machInst, IntRegIndex dest,
523                          IntRegIndex op1)
524    {
525        if (q) {
526            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
527        } else {
528            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
529        }
530    }
531
532    template <template <typename T> class BaseD,
533              template <typename T> class BaseQ>
534    StaticInstPtr
535    decodeNeonUTwoMiscReg(bool q, unsigned size,
536                          ExtMachInst machInst, IntRegIndex dest,
537                          IntRegIndex op1)
538    {
539        if (q) {
540            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
541        } else {
542            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
543        }
544    }
545
546    template <template <typename T> class BaseD,
547              template <typename T> class BaseQ>
548    StaticInstPtr
549    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
550                            ExtMachInst machInst, IntRegIndex dest,
551                            IntRegIndex op1)
552    {
553        if (notSigned) {
554            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
555                    q, size, machInst, dest, op1);
556        } else {
557            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
558                    q, size, machInst, dest, op1);
559        }
560    }
561
562}};
563
564output exec {{
565    static float
566    vcgtFunc(float op1, float op2)
567    {
568        if (isSnan(op1) || isSnan(op2))
569            return 2.0;
570        return (op1 > op2) ? 0.0 : 1.0;
571    }
572
573    static float
574    vcgeFunc(float op1, float op2)
575    {
576        if (isSnan(op1) || isSnan(op2))
577            return 2.0;
578        return (op1 >= op2) ? 0.0 : 1.0;
579    }
580
581    static float
582    vceqFunc(float op1, float op2)
583    {
584        if (isSnan(op1) || isSnan(op2))
585            return 2.0;
586        return (op1 == op2) ? 0.0 : 1.0;
587    }
588
589    static float
590    vcleFunc(float op1, float op2)
591    {
592        if (isSnan(op1) || isSnan(op2))
593            return 2.0;
594        return (op1 <= op2) ? 0.0 : 1.0;
595    }
596
597    static float
598    vcltFunc(float op1, float op2)
599    {
600        if (isSnan(op1) || isSnan(op2))
601            return 2.0;
602        return (op1 < op2) ? 0.0 : 1.0;
603    }
604
605    static float
606    vacgtFunc(float op1, float op2)
607    {
608        if (isSnan(op1) || isSnan(op2))
609            return 2.0;
610        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
611    }
612
613    static float
614    vacgeFunc(float op1, float op2)
615    {
616        if (isSnan(op1) || isSnan(op2))
617            return 2.0;
618        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
619    }
620}};
621
622let {{
623
624    header_output = ""
625    exec_output = ""
626
627    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
628    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
629    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
630    signedTypes = smallSignedTypes + ("int64_t",)
631    smallTypes = smallUnsignedTypes + smallSignedTypes
632    allTypes = unsignedTypes + signedTypes
633
634    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
635                          readDest=False, pairwise=False):
636        global header_output, exec_output
637        eWalkCode = simdEnabledCheckCode + '''
638        RegVect srcReg1, srcReg2, destReg;
639        '''
640        for reg in range(rCount):
641            eWalkCode += '''
642                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
643                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
644            ''' % { "reg" : reg }
645            if readDest:
646                eWalkCode += '''
647                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
648                ''' % { "reg" : reg }
649        readDestCode = ''
650        if readDest:
651            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
652        if pairwise:
653            eWalkCode += '''
654            for (unsigned i = 0; i < eCount; i++) {
655                Element srcElem1 = gtoh(2 * i < eCount ?
656                                        srcReg1.elements[2 * i] :
657                                        srcReg2.elements[2 * i - eCount]);
658                Element srcElem2 = gtoh(2 * i < eCount ?
659                                        srcReg1.elements[2 * i + 1] :
660                                        srcReg2.elements[2 * i + 1 - eCount]);
661                Element destElem;
662                %(readDest)s
663                %(op)s
664                destReg.elements[i] = htog(destElem);
665            }
666            ''' % { "op" : op, "readDest" : readDestCode }
667        else:
668            eWalkCode += '''
669            for (unsigned i = 0; i < eCount; i++) {
670                Element srcElem1 = gtoh(srcReg1.elements[i]);
671                Element srcElem2 = gtoh(srcReg2.elements[i]);
672                Element destElem;
673                %(readDest)s
674                %(op)s
675                destReg.elements[i] = htog(destElem);
676            }
677            ''' % { "op" : op, "readDest" : readDestCode }
678        for reg in range(rCount):
679            eWalkCode += '''
680            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
681            ''' % { "reg" : reg }
682        iop = InstObjParams(name, Name,
683                            "RegRegRegOp",
684                            { "code": eWalkCode,
685                              "r_count": rCount,
686                              "predicate_test": predicateTest,
687                              "op_class": opClass }, [])
688        header_output += NeonRegRegRegOpDeclare.subst(iop)
689        exec_output += NeonEqualRegExecute.subst(iop)
690        for type in types:
691            substDict = { "targs" : type,
692                          "class_name" : Name }
693            exec_output += NeonExecDeclare.subst(substDict)
694
695    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
696                            readDest=False, pairwise=False, toInt=False):
697        global header_output, exec_output
698        eWalkCode = simdEnabledCheckCode + '''
699        typedef FloatReg FloatVect[rCount];
700        FloatVect srcRegs1, srcRegs2;
701        '''
702        if toInt:
703            eWalkCode += 'RegVect destRegs;\n'
704        else:
705            eWalkCode += 'FloatVect destRegs;\n'
706        for reg in range(rCount):
707            eWalkCode += '''
708                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
709                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
710            ''' % { "reg" : reg }
711            if readDest:
712                if toInt:
713                    eWalkCode += '''
714                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
715                    ''' % { "reg" : reg }
716                else:
717                    eWalkCode += '''
718                        destRegs[%(reg)d] = FpDestP%(reg)d;
719                    ''' % { "reg" : reg }
720        readDestCode = ''
721        if readDest:
722            readDestCode = 'destReg = destRegs[r];'
723        destType = 'FloatReg'
724        writeDest = 'destRegs[r] = destReg;'
725        if toInt:
726            destType = 'FloatRegBits'
727            writeDest = 'destRegs.regs[r] = destReg;'
728        if pairwise:
729            eWalkCode += '''
730            for (unsigned r = 0; r < rCount; r++) {
731                FloatReg srcReg1 = (2 * r < rCount) ?
732                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
733                FloatReg srcReg2 = (2 * r < rCount) ?
734                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
735                %(destType)s destReg;
736                %(readDest)s
737                %(op)s
738                %(writeDest)s
739            }
740            ''' % { "op" : op,
741                    "readDest" : readDestCode,
742                    "destType" : destType,
743                    "writeDest" : writeDest }
744        else:
745            eWalkCode += '''
746            for (unsigned r = 0; r < rCount; r++) {
747                FloatReg srcReg1 = srcRegs1[r];
748                FloatReg srcReg2 = srcRegs2[r];
749                %(destType)s destReg;
750                %(readDest)s
751                %(op)s
752                %(writeDest)s
753            }
754            ''' % { "op" : op,
755                    "readDest" : readDestCode,
756                    "destType" : destType,
757                    "writeDest" : writeDest }
758        for reg in range(rCount):
759            if toInt:
760                eWalkCode += '''
761                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
762                ''' % { "reg" : reg }
763            else:
764                eWalkCode += '''
765                FpDestP%(reg)d = destRegs[%(reg)d];
766                ''' % { "reg" : reg }
767        iop = InstObjParams(name, Name,
768                            "FpRegRegRegOp",
769                            { "code": eWalkCode,
770                              "r_count": rCount,
771                              "predicate_test": predicateTest,
772                              "op_class": opClass }, [])
773        header_output += NeonRegRegRegOpDeclare.subst(iop)
774        exec_output += NeonEqualRegExecute.subst(iop)
775        for type in types:
776            substDict = { "targs" : type,
777                          "class_name" : Name }
778            exec_output += NeonExecDeclare.subst(substDict)
779
780    def threeUnequalRegInst(name, Name, opClass, types, op,
781                            bigSrc1, bigSrc2, bigDest, readDest):
782        global header_output, exec_output
783        src1Cnt = src2Cnt = destCnt = 2
784        src1Prefix = src2Prefix = destPrefix = ''
785        if bigSrc1:
786            src1Cnt = 4
787            src1Prefix = 'Big'
788        if bigSrc2:
789            src2Cnt = 4
790            src2Prefix = 'Big'
791        if bigDest:
792            destCnt = 4
793            destPrefix = 'Big'
794        eWalkCode = simdEnabledCheckCode + '''
795            %sRegVect srcReg1;
796            %sRegVect srcReg2;
797            %sRegVect destReg;
798        ''' % (src1Prefix, src2Prefix, destPrefix)
799        for reg in range(src1Cnt):
800            eWalkCode += '''
801                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
802            ''' % { "reg" : reg }
803        for reg in range(src2Cnt):
804            eWalkCode += '''
805                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
806            ''' % { "reg" : reg }
807        if readDest:
808            for reg in range(destCnt):
809                eWalkCode += '''
810                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
811                ''' % { "reg" : reg }
812        readDestCode = ''
813        if readDest:
814            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
815        eWalkCode += '''
816        for (unsigned i = 0; i < eCount; i++) {
817            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
818            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
819            %(destPrefix)sElement destElem;
820            %(readDest)s
821            %(op)s
822            destReg.elements[i] = htog(destElem);
823        }
824        ''' % { "op" : op, "readDest" : readDestCode,
825                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
826                "destPrefix" : destPrefix }
827        for reg in range(destCnt):
828            eWalkCode += '''
829            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
830            ''' % { "reg" : reg }
831        iop = InstObjParams(name, Name,
832                            "RegRegRegOp",
833                            { "code": eWalkCode,
834                              "r_count": 2,
835                              "predicate_test": predicateTest,
836                              "op_class": opClass }, [])
837        header_output += NeonRegRegRegOpDeclare.subst(iop)
838        exec_output += NeonUnequalRegExecute.subst(iop)
839        for type in types:
840            substDict = { "targs" : type,
841                          "class_name" : Name }
842            exec_output += NeonExecDeclare.subst(substDict)
843
844    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
845        threeUnequalRegInst(name, Name, opClass, types, op,
846                            True, True, False, readDest)
847
848    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
849        threeUnequalRegInst(name, Name, opClass, types, op,
850                            False, False, True, readDest)
851
852    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
853        threeUnequalRegInst(name, Name, opClass, types, op,
854                            True, False, True, readDest)
855
856    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
857        global header_output, exec_output
858        eWalkCode = simdEnabledCheckCode + '''
859        RegVect srcReg1, srcReg2, destReg;
860        '''
861        for reg in range(rCount):
862            eWalkCode += '''
863                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
864                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
865            ''' % { "reg" : reg }
866            if readDest:
867                eWalkCode += '''
868                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
869                ''' % { "reg" : reg }
870        readDestCode = ''
871        if readDest:
872            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
873        eWalkCode += '''
874        if (imm < 0 && imm >= eCount) {
875#if FULL_SYSTEM
876            fault = new UndefinedInstruction;
877#else
878            fault = new UndefinedInstruction(false, mnemonic);
879#endif
880        } else {
881            for (unsigned i = 0; i < eCount; i++) {
882                Element srcElem1 = gtoh(srcReg1.elements[i]);
883                Element srcElem2 = gtoh(srcReg2.elements[imm]);
884                Element destElem;
885                %(readDest)s
886                %(op)s
887                destReg.elements[i] = htog(destElem);
888            }
889        }
890        ''' % { "op" : op, "readDest" : readDestCode }
891        for reg in range(rCount):
892            eWalkCode += '''
893            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
894            ''' % { "reg" : reg }
895        iop = InstObjParams(name, Name,
896                            "RegRegRegImmOp",
897                            { "code": eWalkCode,
898                              "r_count": rCount,
899                              "predicate_test": predicateTest,
900                              "op_class": opClass }, [])
901        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
902        exec_output += NeonEqualRegExecute.subst(iop)
903        for type in types:
904            substDict = { "targs" : type,
905                          "class_name" : Name }
906            exec_output += NeonExecDeclare.subst(substDict)
907
908    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
909        global header_output, exec_output
910        rCount = 2
911        eWalkCode = simdEnabledCheckCode + '''
912        RegVect srcReg1, srcReg2;
913        BigRegVect destReg;
914        '''
915        for reg in range(rCount):
916            eWalkCode += '''
917                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
918                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
919            ''' % { "reg" : reg }
920        if readDest:
921            for reg in range(2 * rCount):
922                eWalkCode += '''
923                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
924                ''' % { "reg" : reg }
925        readDestCode = ''
926        if readDest:
927            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
928        eWalkCode += '''
929        if (imm < 0 && imm >= eCount) {
930#if FULL_SYSTEM
931            fault = new UndefinedInstruction;
932#else
933            fault = new UndefinedInstruction(false, mnemonic);
934#endif
935        } else {
936            for (unsigned i = 0; i < eCount; i++) {
937                Element srcElem1 = gtoh(srcReg1.elements[i]);
938                Element srcElem2 = gtoh(srcReg2.elements[imm]);
939                BigElement destElem;
940                %(readDest)s
941                %(op)s
942                destReg.elements[i] = htog(destElem);
943            }
944        }
945        ''' % { "op" : op, "readDest" : readDestCode }
946        for reg in range(2 * rCount):
947            eWalkCode += '''
948            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
949            ''' % { "reg" : reg }
950        iop = InstObjParams(name, Name,
951                            "RegRegRegImmOp",
952                            { "code": eWalkCode,
953                              "r_count": rCount,
954                              "predicate_test": predicateTest,
955                              "op_class": opClass }, [])
956        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
957        exec_output += NeonUnequalRegExecute.subst(iop)
958        for type in types:
959            substDict = { "targs" : type,
960                          "class_name" : Name }
961            exec_output += NeonExecDeclare.subst(substDict)
962
963    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
964        global header_output, exec_output
965        eWalkCode = simdEnabledCheckCode + '''
966        typedef FloatReg FloatVect[rCount];
967        FloatVect srcRegs1, srcRegs2, destRegs;
968        '''
969        for reg in range(rCount):
970            eWalkCode += '''
971                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
972                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
973            ''' % { "reg" : reg }
974            if readDest:
975                eWalkCode += '''
976                    destRegs[%(reg)d] = FpDestP%(reg)d;
977                ''' % { "reg" : reg }
978        readDestCode = ''
979        if readDest:
980            readDestCode = 'destReg = destRegs[i];'
981        eWalkCode += '''
982        if (imm < 0 && imm >= eCount) {
983#if FULL_SYSTEM
984            fault = new UndefinedInstruction;
985#else
986            fault = new UndefinedInstruction(false, mnemonic);
987#endif
988        } else {
989            for (unsigned i = 0; i < rCount; i++) {
990                FloatReg srcReg1 = srcRegs1[i];
991                FloatReg srcReg2 = srcRegs2[imm];
992                FloatReg destReg;
993                %(readDest)s
994                %(op)s
995                destRegs[i] = destReg;
996            }
997        }
998        ''' % { "op" : op, "readDest" : readDestCode }
999        for reg in range(rCount):
1000            eWalkCode += '''
1001            FpDestP%(reg)d = destRegs[%(reg)d];
1002            ''' % { "reg" : reg }
1003        iop = InstObjParams(name, Name,
1004                            "FpRegRegRegImmOp",
1005                            { "code": eWalkCode,
1006                              "r_count": rCount,
1007                              "predicate_test": predicateTest,
1008                              "op_class": opClass }, [])
1009        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1010        exec_output += NeonEqualRegExecute.subst(iop)
1011        for type in types:
1012            substDict = { "targs" : type,
1013                          "class_name" : Name }
1014            exec_output += NeonExecDeclare.subst(substDict)
1015
1016    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1017            readDest=False, toInt=False, fromInt=False):
1018        global header_output, exec_output
1019        eWalkCode = simdEnabledCheckCode + '''
1020        RegVect srcRegs1, destRegs;
1021        '''
1022        for reg in range(rCount):
1023            eWalkCode += '''
1024                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1025            ''' % { "reg" : reg }
1026            if readDest:
1027                eWalkCode += '''
1028                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1029                ''' % { "reg" : reg }
1030        readDestCode = ''
1031        if readDest:
1032            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1033            if toInt:
1034                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1035        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1036        if fromInt:
1037            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1038        declDest = 'Element destElem;'
1039        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1040        if toInt:
1041            declDest = 'FloatRegBits destReg;'
1042            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1043        eWalkCode += '''
1044        for (unsigned i = 0; i < eCount; i++) {
1045            %(readOp)s
1046            %(declDest)s
1047            %(readDest)s
1048            %(op)s
1049            %(writeDest)s
1050        }
1051        ''' % { "readOp" : readOpCode,
1052                "declDest" : declDest,
1053                "readDest" : readDestCode,
1054                "op" : op,
1055                "writeDest" : writeDestCode }
1056        for reg in range(rCount):
1057            eWalkCode += '''
1058            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1059            ''' % { "reg" : reg }
1060        iop = InstObjParams(name, Name,
1061                            "RegRegImmOp",
1062                            { "code": eWalkCode,
1063                              "r_count": rCount,
1064                              "predicate_test": predicateTest,
1065                              "op_class": opClass }, [])
1066        header_output += NeonRegRegImmOpDeclare.subst(iop)
1067        exec_output += NeonEqualRegExecute.subst(iop)
1068        for type in types:
1069            substDict = { "targs" : type,
1070                          "class_name" : Name }
1071            exec_output += NeonExecDeclare.subst(substDict)
1072
1073    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1074        global header_output, exec_output
1075        eWalkCode = simdEnabledCheckCode + '''
1076        BigRegVect srcReg1;
1077        RegVect destReg;
1078        '''
1079        for reg in range(4):
1080            eWalkCode += '''
1081                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1082            ''' % { "reg" : reg }
1083        if readDest:
1084            for reg in range(2):
1085                eWalkCode += '''
1086                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1087                ''' % { "reg" : reg }
1088        readDestCode = ''
1089        if readDest:
1090            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1091        eWalkCode += '''
1092        for (unsigned i = 0; i < eCount; i++) {
1093            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1094            Element destElem;
1095            %(readDest)s
1096            %(op)s
1097            destReg.elements[i] = htog(destElem);
1098        }
1099        ''' % { "op" : op, "readDest" : readDestCode }
1100        for reg in range(2):
1101            eWalkCode += '''
1102            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1103            ''' % { "reg" : reg }
1104        iop = InstObjParams(name, Name,
1105                            "RegRegImmOp",
1106                            { "code": eWalkCode,
1107                              "r_count": 2,
1108                              "predicate_test": predicateTest,
1109                              "op_class": opClass }, [])
1110        header_output += NeonRegRegImmOpDeclare.subst(iop)
1111        exec_output += NeonUnequalRegExecute.subst(iop)
1112        for type in types:
1113            substDict = { "targs" : type,
1114                          "class_name" : Name }
1115            exec_output += NeonExecDeclare.subst(substDict)
1116
1117    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1118        global header_output, exec_output
1119        eWalkCode = simdEnabledCheckCode + '''
1120        RegVect srcReg1;
1121        BigRegVect destReg;
1122        '''
1123        for reg in range(2):
1124            eWalkCode += '''
1125                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1126            ''' % { "reg" : reg }
1127        if readDest:
1128            for reg in range(4):
1129                eWalkCode += '''
1130                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1131                ''' % { "reg" : reg }
1132        readDestCode = ''
1133        if readDest:
1134            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1135        eWalkCode += '''
1136        for (unsigned i = 0; i < eCount; i++) {
1137            Element srcElem1 = gtoh(srcReg1.elements[i]);
1138            BigElement destElem;
1139            %(readDest)s
1140            %(op)s
1141            destReg.elements[i] = htog(destElem);
1142        }
1143        ''' % { "op" : op, "readDest" : readDestCode }
1144        for reg in range(4):
1145            eWalkCode += '''
1146            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1147            ''' % { "reg" : reg }
1148        iop = InstObjParams(name, Name,
1149                            "RegRegImmOp",
1150                            { "code": eWalkCode,
1151                              "r_count": 2,
1152                              "predicate_test": predicateTest,
1153                              "op_class": opClass }, [])
1154        header_output += NeonRegRegImmOpDeclare.subst(iop)
1155        exec_output += NeonUnequalRegExecute.subst(iop)
1156        for type in types:
1157            substDict = { "targs" : type,
1158                          "class_name" : Name }
1159            exec_output += NeonExecDeclare.subst(substDict)
1160
1161    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1162        global header_output, exec_output
1163        eWalkCode = simdEnabledCheckCode + '''
1164        RegVect srcReg1, destReg;
1165        '''
1166        for reg in range(rCount):
1167            eWalkCode += '''
1168                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1169            ''' % { "reg" : reg }
1170            if readDest:
1171                eWalkCode += '''
1172                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1173                ''' % { "reg" : reg }
1174        readDestCode = ''
1175        if readDest:
1176            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1177        eWalkCode += '''
1178        for (unsigned i = 0; i < eCount; i++) {
1179            unsigned j = i;
1180            Element srcElem1 = gtoh(srcReg1.elements[i]);
1181            Element destElem;
1182            %(readDest)s
1183            %(op)s
1184            destReg.elements[j] = htog(destElem);
1185        }
1186        ''' % { "op" : op, "readDest" : readDestCode }
1187        for reg in range(rCount):
1188            eWalkCode += '''
1189            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1190            ''' % { "reg" : reg }
1191        iop = InstObjParams(name, Name,
1192                            "RegRegOp",
1193                            { "code": eWalkCode,
1194                              "r_count": rCount,
1195                              "predicate_test": predicateTest,
1196                              "op_class": opClass }, [])
1197        header_output += NeonRegRegOpDeclare.subst(iop)
1198        exec_output += NeonEqualRegExecute.subst(iop)
1199        for type in types:
1200            substDict = { "targs" : type,
1201                          "class_name" : Name }
1202            exec_output += NeonExecDeclare.subst(substDict)
1203
1204    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1205        global header_output, exec_output
1206        eWalkCode = simdEnabledCheckCode + '''
1207        RegVect srcReg1, destReg;
1208        '''
1209        for reg in range(rCount):
1210            eWalkCode += '''
1211                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1212            ''' % { "reg" : reg }
1213            if readDest:
1214                eWalkCode += '''
1215                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1216                ''' % { "reg" : reg }
1217        readDestCode = ''
1218        if readDest:
1219            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1220        eWalkCode += '''
1221        for (unsigned i = 0; i < eCount; i++) {
1222            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1223            Element destElem;
1224            %(readDest)s
1225            %(op)s
1226            destReg.elements[i] = htog(destElem);
1227        }
1228        ''' % { "op" : op, "readDest" : readDestCode }
1229        for reg in range(rCount):
1230            eWalkCode += '''
1231            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1232            ''' % { "reg" : reg }
1233        iop = InstObjParams(name, Name,
1234                            "RegRegImmOp",
1235                            { "code": eWalkCode,
1236                              "r_count": rCount,
1237                              "predicate_test": predicateTest,
1238                              "op_class": opClass }, [])
1239        header_output += NeonRegRegImmOpDeclare.subst(iop)
1240        exec_output += NeonEqualRegExecute.subst(iop)
1241        for type in types:
1242            substDict = { "targs" : type,
1243                          "class_name" : Name }
1244            exec_output += NeonExecDeclare.subst(substDict)
1245
1246    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1247        global header_output, exec_output
1248        eWalkCode = simdEnabledCheckCode + '''
1249        RegVect srcReg1, destReg;
1250        '''
1251        for reg in range(rCount):
1252            eWalkCode += '''
1253                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1254                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1255            ''' % { "reg" : reg }
1256            if readDest:
1257                eWalkCode += '''
1258                ''' % { "reg" : reg }
1259        readDestCode = ''
1260        if readDest:
1261            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1262        eWalkCode += op
1263        for reg in range(rCount):
1264            eWalkCode += '''
1265            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1266            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1267            ''' % { "reg" : reg }
1268        iop = InstObjParams(name, Name,
1269                            "RegRegOp",
1270                            { "code": eWalkCode,
1271                              "r_count": rCount,
1272                              "predicate_test": predicateTest,
1273                              "op_class": opClass }, [])
1274        header_output += NeonRegRegOpDeclare.subst(iop)
1275        exec_output += NeonEqualRegExecute.subst(iop)
1276        for type in types:
1277            substDict = { "targs" : type,
1278                          "class_name" : Name }
1279            exec_output += NeonExecDeclare.subst(substDict)
1280
1281    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1282            readDest=False, toInt=False):
1283        global header_output, exec_output
1284        eWalkCode = simdEnabledCheckCode + '''
1285        typedef FloatReg FloatVect[rCount];
1286        FloatVect srcRegs1;
1287        '''
1288        if toInt:
1289            eWalkCode += 'RegVect destRegs;\n'
1290        else:
1291            eWalkCode += 'FloatVect destRegs;\n'
1292        for reg in range(rCount):
1293            eWalkCode += '''
1294                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1295            ''' % { "reg" : reg }
1296            if readDest:
1297                if toInt:
1298                    eWalkCode += '''
1299                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1300                    ''' % { "reg" : reg }
1301                else:
1302                    eWalkCode += '''
1303                        destRegs[%(reg)d] = FpDestP%(reg)d;
1304                    ''' % { "reg" : reg }
1305        readDestCode = ''
1306        if readDest:
1307            readDestCode = 'destReg = destRegs[i];'
1308        destType = 'FloatReg'
1309        writeDest = 'destRegs[r] = destReg;'
1310        if toInt:
1311            destType = 'FloatRegBits'
1312            writeDest = 'destRegs.regs[r] = destReg;'
1313        eWalkCode += '''
1314        for (unsigned r = 0; r < rCount; r++) {
1315            FloatReg srcReg1 = srcRegs1[r];
1316            %(destType)s destReg;
1317            %(readDest)s
1318            %(op)s
1319            %(writeDest)s
1320        }
1321        ''' % { "op" : op,
1322                "readDest" : readDestCode,
1323                "destType" : destType,
1324                "writeDest" : writeDest }
1325        for reg in range(rCount):
1326            if toInt:
1327                eWalkCode += '''
1328                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1329                ''' % { "reg" : reg }
1330            else:
1331                eWalkCode += '''
1332                FpDestP%(reg)d = destRegs[%(reg)d];
1333                ''' % { "reg" : reg }
1334        iop = InstObjParams(name, Name,
1335                            "FpRegRegOp",
1336                            { "code": eWalkCode,
1337                              "r_count": rCount,
1338                              "predicate_test": predicateTest,
1339                              "op_class": opClass }, [])
1340        header_output += NeonRegRegOpDeclare.subst(iop)
1341        exec_output += NeonEqualRegExecute.subst(iop)
1342        for type in types:
1343            substDict = { "targs" : type,
1344                          "class_name" : Name }
1345            exec_output += NeonExecDeclare.subst(substDict)
1346
1347    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1348        global header_output, exec_output
1349        eWalkCode = simdEnabledCheckCode + '''
1350        RegVect srcRegs;
1351        BigRegVect destReg;
1352        '''
1353        for reg in range(rCount):
1354            eWalkCode += '''
1355                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1356            ''' % { "reg" : reg }
1357            if readDest:
1358                eWalkCode += '''
1359                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1360                ''' % { "reg" : reg }
1361        readDestCode = ''
1362        if readDest:
1363            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1364        eWalkCode += '''
1365        for (unsigned i = 0; i < eCount / 2; i++) {
1366            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1367            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1368            BigElement destElem;
1369            %(readDest)s
1370            %(op)s
1371            destReg.elements[i] = htog(destElem);
1372        }
1373        ''' % { "op" : op, "readDest" : readDestCode }
1374        for reg in range(rCount):
1375            eWalkCode += '''
1376            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1377            ''' % { "reg" : reg }
1378        iop = InstObjParams(name, Name,
1379                            "RegRegOp",
1380                            { "code": eWalkCode,
1381                              "r_count": rCount,
1382                              "predicate_test": predicateTest,
1383                              "op_class": opClass }, [])
1384        header_output += NeonRegRegOpDeclare.subst(iop)
1385        exec_output += NeonUnequalRegExecute.subst(iop)
1386        for type in types:
1387            substDict = { "targs" : type,
1388                          "class_name" : Name }
1389            exec_output += NeonExecDeclare.subst(substDict)
1390
1391    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1392        global header_output, exec_output
1393        eWalkCode = simdEnabledCheckCode + '''
1394        BigRegVect srcReg1;
1395        RegVect destReg;
1396        '''
1397        for reg in range(4):
1398            eWalkCode += '''
1399                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1400            ''' % { "reg" : reg }
1401        if readDest:
1402            for reg in range(2):
1403                eWalkCode += '''
1404                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1405                ''' % { "reg" : reg }
1406        readDestCode = ''
1407        if readDest:
1408            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1409        eWalkCode += '''
1410        for (unsigned i = 0; i < eCount; i++) {
1411            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1412            Element destElem;
1413            %(readDest)s
1414            %(op)s
1415            destReg.elements[i] = htog(destElem);
1416        }
1417        ''' % { "op" : op, "readDest" : readDestCode }
1418        for reg in range(2):
1419            eWalkCode += '''
1420            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1421            ''' % { "reg" : reg }
1422        iop = InstObjParams(name, Name,
1423                            "RegRegOp",
1424                            { "code": eWalkCode,
1425                              "r_count": 2,
1426                              "predicate_test": predicateTest,
1427                              "op_class": opClass }, [])
1428        header_output += NeonRegRegOpDeclare.subst(iop)
1429        exec_output += NeonUnequalRegExecute.subst(iop)
1430        for type in types:
1431            substDict = { "targs" : type,
1432                          "class_name" : Name }
1433            exec_output += NeonExecDeclare.subst(substDict)
1434
1435    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1436        global header_output, exec_output
1437        eWalkCode = simdEnabledCheckCode + '''
1438        RegVect destReg;
1439        '''
1440        if readDest:
1441            for reg in range(rCount):
1442                eWalkCode += '''
1443                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1444                ''' % { "reg" : reg }
1445        readDestCode = ''
1446        if readDest:
1447            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1448        eWalkCode += '''
1449        for (unsigned i = 0; i < eCount; i++) {
1450            Element destElem;
1451            %(readDest)s
1452            %(op)s
1453            destReg.elements[i] = htog(destElem);
1454        }
1455        ''' % { "op" : op, "readDest" : readDestCode }
1456        for reg in range(rCount):
1457            eWalkCode += '''
1458            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1459            ''' % { "reg" : reg }
1460        iop = InstObjParams(name, Name,
1461                            "RegImmOp",
1462                            { "code": eWalkCode,
1463                              "r_count": rCount,
1464                              "predicate_test": predicateTest,
1465                              "op_class": opClass }, [])
1466        header_output += NeonRegImmOpDeclare.subst(iop)
1467        exec_output += NeonEqualRegExecute.subst(iop)
1468        for type in types:
1469            substDict = { "targs" : type,
1470                          "class_name" : Name }
1471            exec_output += NeonExecDeclare.subst(substDict)
1472
1473    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1474        global header_output, exec_output
1475        eWalkCode = simdEnabledCheckCode + '''
1476        RegVect srcReg1;
1477        BigRegVect destReg;
1478        '''
1479        for reg in range(2):
1480            eWalkCode += '''
1481                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1482            ''' % { "reg" : reg }
1483        if readDest:
1484            for reg in range(4):
1485                eWalkCode += '''
1486                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1487                ''' % { "reg" : reg }
1488        readDestCode = ''
1489        if readDest:
1490            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1491        eWalkCode += '''
1492        for (unsigned i = 0; i < eCount; i++) {
1493            Element srcElem1 = gtoh(srcReg1.elements[i]);
1494            BigElement destElem;
1495            %(readDest)s
1496            %(op)s
1497            destReg.elements[i] = htog(destElem);
1498        }
1499        ''' % { "op" : op, "readDest" : readDestCode }
1500        for reg in range(4):
1501            eWalkCode += '''
1502            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1503            ''' % { "reg" : reg }
1504        iop = InstObjParams(name, Name,
1505                            "RegRegOp",
1506                            { "code": eWalkCode,
1507                              "r_count": 2,
1508                              "predicate_test": predicateTest,
1509                              "op_class": opClass }, [])
1510        header_output += NeonRegRegOpDeclare.subst(iop)
1511        exec_output += NeonUnequalRegExecute.subst(iop)
1512        for type in types:
1513            substDict = { "targs" : type,
1514                          "class_name" : Name }
1515            exec_output += NeonExecDeclare.subst(substDict)
1516
1517    vhaddCode = '''
1518        Element carryBit =
1519            (((unsigned)srcElem1 & 0x1) +
1520             ((unsigned)srcElem2 & 0x1)) >> 1;
1521        // Use division instead of a shift to ensure the sign extension works
1522        // right. The compiler will figure out if it can be a shift. Mask the
1523        // inputs so they get truncated correctly.
1524        destElem = (((srcElem1 & ~(Element)1) / 2) +
1525                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1526    '''
1527    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
1528    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
1529
1530    vrhaddCode = '''
1531        Element carryBit =
1532            (((unsigned)srcElem1 & 0x1) +
1533             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
1534        // Use division instead of a shift to ensure the sign extension works
1535        // right. The compiler will figure out if it can be a shift. Mask the
1536        // inputs so they get truncated correctly.
1537        destElem = (((srcElem1 & ~(Element)1) / 2) +
1538                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
1539    '''
1540    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
1541    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
1542
1543    vhsubCode = '''
1544        Element barrowBit =
1545            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
1546        // Use division instead of a shift to ensure the sign extension works
1547        // right. The compiler will figure out if it can be a shift. Mask the
1548        // inputs so they get truncated correctly.
1549        destElem = (((srcElem1 & ~(Element)1) / 2) -
1550                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
1551    '''
1552    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
1553    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
1554
1555    vandCode = '''
1556        destElem = srcElem1 & srcElem2;
1557    '''
1558    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
1559    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
1560
1561    vbicCode = '''
1562        destElem = srcElem1 & ~srcElem2;
1563    '''
1564    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
1565    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
1566
1567    vorrCode = '''
1568        destElem = srcElem1 | srcElem2;
1569    '''
1570    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
1571    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
1572
1573    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
1574    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
1575
1576    vornCode = '''
1577        destElem = srcElem1 | ~srcElem2;
1578    '''
1579    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
1580    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
1581
1582    veorCode = '''
1583        destElem = srcElem1 ^ srcElem2;
1584    '''
1585    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
1586    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
1587
1588    vbifCode = '''
1589        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
1590    '''
1591    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
1592    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
1593    vbitCode = '''
1594        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
1595    '''
1596    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
1597    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
1598    vbslCode = '''
1599        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
1600    '''
1601    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
1602    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
1603
1604    vmaxCode = '''
1605        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
1606    '''
1607    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
1608    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
1609
1610    vminCode = '''
1611        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
1612    '''
1613    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
1614    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
1615
1616    vaddCode = '''
1617        destElem = srcElem1 + srcElem2;
1618    '''
1619    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
1620    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
1621
1622    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", unsignedTypes,
1623                      2, vaddCode, pairwise=True)
1624    threeEqualRegInst("vpadd", "NVpaddQ", "SimdAddOp", unsignedTypes,
1625                      4, vaddCode, pairwise=True)
1626    vaddlwCode = '''
1627        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
1628    '''
1629    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
1630    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
1631    vaddhnCode = '''
1632        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
1633                   (sizeof(Element) * 8);
1634    '''
1635    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
1636    vraddhnCode = '''
1637        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
1638                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1639                   (sizeof(Element) * 8);
1640    '''
1641    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
1642
1643    vsubCode = '''
1644        destElem = srcElem1 - srcElem2;
1645    '''
1646    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
1647    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
1648    vsublwCode = '''
1649        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
1650    '''
1651    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
1652    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
1653
1654    vqaddUCode = '''
1655        destElem = srcElem1 + srcElem2;
1656        FPSCR fpscr = (FPSCR) FpscrQc;
1657        if (destElem < srcElem1 || destElem < srcElem2) {
1658            destElem = (Element)(-1);
1659            fpscr.qc = 1;
1660        }
1661        FpscrQc = fpscr;
1662    '''
1663    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
1664    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
1665    vsubhnCode = '''
1666        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
1667                   (sizeof(Element) * 8);
1668    '''
1669    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
1670    vrsubhnCode = '''
1671        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
1672                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
1673                   (sizeof(Element) * 8);
1674    '''
1675    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
1676
1677    vqaddSCode = '''
1678        destElem = srcElem1 + srcElem2;
1679        FPSCR fpscr = (FPSCR) FpscrQc;
1680        bool negDest = (destElem < 0);
1681        bool negSrc1 = (srcElem1 < 0);
1682        bool negSrc2 = (srcElem2 < 0);
1683        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
1684            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1685            if (negDest)
1686                destElem -= 1;
1687            fpscr.qc = 1;
1688        }
1689        FpscrQc = fpscr;
1690    '''
1691    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
1692    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
1693
1694    vqsubUCode = '''
1695        destElem = srcElem1 - srcElem2;
1696        FPSCR fpscr = (FPSCR) FpscrQc;
1697        if (destElem > srcElem1) {
1698            destElem = 0;
1699            fpscr.qc = 1;
1700        }
1701        FpscrQc = fpscr;
1702    '''
1703    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
1704    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
1705
1706    vqsubSCode = '''
1707        destElem = srcElem1 - srcElem2;
1708        FPSCR fpscr = (FPSCR) FpscrQc;
1709        bool negDest = (destElem < 0);
1710        bool negSrc1 = (srcElem1 < 0);
1711        bool posSrc2 = (srcElem2 >= 0);
1712        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
1713            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
1714            if (negDest)
1715                destElem -= 1;
1716            fpscr.qc = 1;
1717        }
1718        FpscrQc = fpscr;
1719    '''
1720    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
1721    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
1722
1723    vcgtCode = '''
1724        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
1725    '''
1726    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
1727    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
1728
1729    vcgeCode = '''
1730        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
1731    '''
1732    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
1733    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
1734
1735    vceqCode = '''
1736        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
1737    '''
1738    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
1739    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
1740
1741    vshlCode = '''
1742        int16_t shiftAmt = (int8_t)srcElem2;
1743        if (shiftAmt < 0) {
1744            shiftAmt = -shiftAmt;
1745            if (shiftAmt >= sizeof(Element) * 8) {
1746                shiftAmt = sizeof(Element) * 8 - 1;
1747                destElem = 0;
1748            } else {
1749                destElem = (srcElem1 >> shiftAmt);
1750            }
1751            // Make sure the right shift sign extended when it should.
1752            if (ltz(srcElem1) && !ltz(destElem)) {
1753                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1754                                             1 - shiftAmt));
1755            }
1756        } else {
1757            if (shiftAmt >= sizeof(Element) * 8) {
1758                destElem = 0;
1759            } else {
1760                destElem = srcElem1 << shiftAmt;
1761            }
1762        }
1763    '''
1764    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
1765    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
1766
1767    vrshlCode = '''
1768        int16_t shiftAmt = (int8_t)srcElem2;
1769        if (shiftAmt < 0) {
1770            shiftAmt = -shiftAmt;
1771            Element rBit = 0;
1772            if (shiftAmt <= sizeof(Element) * 8)
1773                rBit = bits(srcElem1, shiftAmt - 1);
1774            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
1775                rBit = 1;
1776            if (shiftAmt >= sizeof(Element) * 8) {
1777                shiftAmt = sizeof(Element) * 8 - 1;
1778                destElem = 0;
1779            } else {
1780                destElem = (srcElem1 >> shiftAmt);
1781            }
1782            // Make sure the right shift sign extended when it should.
1783            if (ltz(srcElem1) && !ltz(destElem)) {
1784                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1785                                             1 - shiftAmt));
1786            }
1787            destElem += rBit;
1788        } else if (shiftAmt > 0) {
1789            if (shiftAmt >= sizeof(Element) * 8) {
1790                destElem = 0;
1791            } else {
1792                destElem = srcElem1 << shiftAmt;
1793            }
1794        } else {
1795            destElem = srcElem1;
1796        }
1797    '''
1798    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
1799    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
1800
1801    vqshlUCode = '''
1802        int16_t shiftAmt = (int8_t)srcElem2;
1803        FPSCR fpscr = (FPSCR) FpscrQc;
1804        if (shiftAmt < 0) {
1805            shiftAmt = -shiftAmt;
1806            if (shiftAmt >= sizeof(Element) * 8) {
1807                shiftAmt = sizeof(Element) * 8 - 1;
1808                destElem = 0;
1809            } else {
1810                destElem = (srcElem1 >> shiftAmt);
1811            }
1812        } else if (shiftAmt > 0) {
1813            if (shiftAmt >= sizeof(Element) * 8) {
1814                if (srcElem1 != 0) {
1815                    destElem = mask(sizeof(Element) * 8);
1816                    fpscr.qc = 1;
1817                } else {
1818                    destElem = 0;
1819                }
1820            } else {
1821                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1822                            sizeof(Element) * 8 - shiftAmt)) {
1823                    destElem = mask(sizeof(Element) * 8);
1824                    fpscr.qc = 1;
1825                } else {
1826                    destElem = srcElem1 << shiftAmt;
1827                }
1828            }
1829        } else {
1830            destElem = srcElem1;
1831        }
1832        FpscrQc = fpscr;
1833    '''
1834    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
1835    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
1836
1837    vqshlSCode = '''
1838        int16_t shiftAmt = (int8_t)srcElem2;
1839        FPSCR fpscr = (FPSCR) FpscrQc;
1840        if (shiftAmt < 0) {
1841            shiftAmt = -shiftAmt;
1842            if (shiftAmt >= sizeof(Element) * 8) {
1843                shiftAmt = sizeof(Element) * 8 - 1;
1844                destElem = 0;
1845            } else {
1846                destElem = (srcElem1 >> shiftAmt);
1847            }
1848            // Make sure the right shift sign extended when it should.
1849            if (srcElem1 < 0 && destElem >= 0) {
1850                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1851                                             1 - shiftAmt));
1852            }
1853        } else if (shiftAmt > 0) {
1854            bool sat = false;
1855            if (shiftAmt >= sizeof(Element) * 8) {
1856                if (srcElem1 != 0)
1857                    sat = true;
1858                else
1859                    destElem = 0;
1860            } else {
1861                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1862                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1863                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1864                    sat = true;
1865                } else {
1866                    destElem = srcElem1 << shiftAmt;
1867                }
1868            }
1869            if (sat) {
1870                fpscr.qc = 1;
1871                destElem = mask(sizeof(Element) * 8 - 1);
1872                if (srcElem1 < 0)
1873                    destElem = ~destElem;
1874            }
1875        } else {
1876            destElem = srcElem1;
1877        }
1878        FpscrQc = fpscr;
1879    '''
1880    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
1881    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
1882
1883    vqrshlUCode = '''
1884        int16_t shiftAmt = (int8_t)srcElem2;
1885        FPSCR fpscr = (FPSCR) FpscrQc;
1886        if (shiftAmt < 0) {
1887            shiftAmt = -shiftAmt;
1888            Element rBit = 0;
1889            if (shiftAmt <= sizeof(Element) * 8)
1890                rBit = bits(srcElem1, shiftAmt - 1);
1891            if (shiftAmt >= sizeof(Element) * 8) {
1892                shiftAmt = sizeof(Element) * 8 - 1;
1893                destElem = 0;
1894            } else {
1895                destElem = (srcElem1 >> shiftAmt);
1896            }
1897            destElem += rBit;
1898        } else {
1899            if (shiftAmt >= sizeof(Element) * 8) {
1900                if (srcElem1 != 0) {
1901                    destElem = mask(sizeof(Element) * 8);
1902                    fpscr.qc = 1;
1903                } else {
1904                    destElem = 0;
1905                }
1906            } else {
1907                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1908                            sizeof(Element) * 8 - shiftAmt)) {
1909                    destElem = mask(sizeof(Element) * 8);
1910                    fpscr.qc = 1;
1911                } else {
1912                    destElem = srcElem1 << shiftAmt;
1913                }
1914            }
1915        }
1916        FpscrQc = fpscr;
1917    '''
1918    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
1919    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
1920
1921    vqrshlSCode = '''
1922        int16_t shiftAmt = (int8_t)srcElem2;
1923        FPSCR fpscr = (FPSCR) FpscrQc;
1924        if (shiftAmt < 0) {
1925            shiftAmt = -shiftAmt;
1926            Element rBit = 0;
1927            if (shiftAmt <= sizeof(Element) * 8)
1928                rBit = bits(srcElem1, shiftAmt - 1);
1929            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
1930                rBit = 1;
1931            if (shiftAmt >= sizeof(Element) * 8) {
1932                shiftAmt = sizeof(Element) * 8 - 1;
1933                destElem = 0;
1934            } else {
1935                destElem = (srcElem1 >> shiftAmt);
1936            }
1937            // Make sure the right shift sign extended when it should.
1938            if (srcElem1 < 0 && destElem >= 0) {
1939                destElem |= -((Element)1 << (sizeof(Element) * 8 -
1940                                             1 - shiftAmt));
1941            }
1942            destElem += rBit;
1943        } else if (shiftAmt > 0) {
1944            bool sat = false;
1945            if (shiftAmt >= sizeof(Element) * 8) {
1946                if (srcElem1 != 0)
1947                    sat = true;
1948                else
1949                    destElem = 0;
1950            } else {
1951                if (bits(srcElem1, sizeof(Element) * 8 - 1,
1952                            sizeof(Element) * 8 - 1 - shiftAmt) !=
1953                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
1954                    sat = true;
1955                } else {
1956                    destElem = srcElem1 << shiftAmt;
1957                }
1958            }
1959            if (sat) {
1960                fpscr.qc = 1;
1961                destElem = mask(sizeof(Element) * 8 - 1);
1962                if (srcElem1 < 0)
1963                    destElem = ~destElem;
1964            }
1965        } else {
1966            destElem = srcElem1;
1967        }
1968        FpscrQc = fpscr;
1969    '''
1970    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
1971    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
1972
1973    vabaCode = '''
1974        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1975                                            (srcElem2 - srcElem1);
1976    '''
1977    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
1978    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
1979    vabalCode = '''
1980        destElem += (srcElem1 > srcElem2) ?
1981            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1982            ((BigElement)srcElem2 - (BigElement)srcElem1);
1983    '''
1984    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
1985
1986    vabdCode = '''
1987        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
1988                                           (srcElem2 - srcElem1);
1989    '''
1990    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
1991    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
1992    vabdlCode = '''
1993        destElem = (srcElem1 > srcElem2) ?
1994            ((BigElement)srcElem1 - (BigElement)srcElem2) :
1995            ((BigElement)srcElem2 - (BigElement)srcElem1);
1996    '''
1997    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
1998
1999    vtstCode = '''
2000        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2001    '''
2002    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2003    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2004
2005    vmulCode = '''
2006        destElem = srcElem1 * srcElem2;
2007    '''
2008    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2009    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2010    vmullCode = '''
2011        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2012    '''
2013    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2014
2015    vmlaCode = '''
2016        destElem = destElem + srcElem1 * srcElem2;
2017    '''
2018    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2019    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2020    vmlalCode = '''
2021        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2022    '''
2023    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2024
2025    vqdmlalCode = '''
2026        FPSCR fpscr = (FPSCR) FpscrQc;
2027        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2028        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2029        Element halfNeg = maxNeg / 2;
2030        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2031            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2032            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2033            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2034            fpscr.qc = 1;
2035        }
2036        bool negPreDest = ltz(destElem);
2037        destElem += midElem;
2038        bool negDest = ltz(destElem);
2039        bool negMid = ltz(midElem);
2040        if (negPreDest == negMid && negMid != negDest) {
2041            destElem = mask(sizeof(BigElement) * 8 - 1);
2042            if (negPreDest)
2043                destElem = ~destElem;
2044            fpscr.qc = 1;
2045        }
2046        FpscrQc = fpscr;
2047    '''
2048    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2049
2050    vqdmlslCode = '''
2051        FPSCR fpscr = (FPSCR) FpscrQc;
2052        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2053        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2054        Element halfNeg = maxNeg / 2;
2055        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2056            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2057            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2058            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2059            fpscr.qc = 1;
2060        }
2061        bool negPreDest = ltz(destElem);
2062        destElem -= midElem;
2063        bool negDest = ltz(destElem);
2064        bool posMid = ltz((BigElement)-midElem);
2065        if (negPreDest == posMid && posMid != negDest) {
2066            destElem = mask(sizeof(BigElement) * 8 - 1);
2067            if (negPreDest)
2068                destElem = ~destElem;
2069            fpscr.qc = 1;
2070        }
2071        FpscrQc = fpscr;
2072    '''
2073    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2074
2075    vqdmullCode = '''
2076        FPSCR fpscr = (FPSCR) FpscrQc;
2077        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2078        if (srcElem1 == srcElem2 &&
2079                srcElem1 == (Element)((Element)1 <<
2080                    (Element)(sizeof(Element) * 8 - 1))) {
2081            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2082            fpscr.qc = 1;
2083        }
2084        FpscrQc = fpscr;
2085    '''
2086    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2087
2088    vmlsCode = '''
2089        destElem = destElem - srcElem1 * srcElem2;
2090    '''
2091    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2092    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2093    vmlslCode = '''
2094        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2095    '''
2096    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2097
2098    vmulpCode = '''
2099        destElem = 0;
2100        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2101            if (bits(srcElem2, j))
2102                destElem ^= srcElem1 << j;
2103        }
2104    '''
2105    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2106    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2107    vmullpCode = '''
2108        destElem = 0;
2109        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2110            if (bits(srcElem2, j))
2111                destElem ^= (BigElement)srcElem1 << j;
2112        }
2113    '''
2114    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2115
2116    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", allTypes, 2, vmaxCode, pairwise=True)
2117    threeEqualRegInst("vpmax", "VpmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode, pairwise=True)
2118
2119    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", allTypes, 2, vminCode, pairwise=True)
2120    threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True)
2121
2122    vqdmulhCode = '''
2123        FPSCR fpscr = (FPSCR) FpscrQc;
2124        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2125                   (sizeof(Element) * 8);
2126        if (srcElem1 == srcElem2 &&
2127                srcElem1 == (Element)((Element)1 <<
2128                    (sizeof(Element) * 8 - 1))) {
2129            destElem = ~srcElem1;
2130            fpscr.qc = 1;
2131        }
2132        FpscrQc = fpscr;
2133    '''
2134    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2135    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2136
2137    vqrdmulhCode = '''
2138        FPSCR fpscr = (FPSCR) FpscrQc;
2139        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2140                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2141                   (sizeof(Element) * 8);
2142        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2143        Element halfNeg = maxNeg / 2;
2144        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2145            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2146            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2147            if (destElem < 0) {
2148                destElem = mask(sizeof(Element) * 8 - 1);
2149            } else {
2150                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2151            }
2152            fpscr.qc = 1;
2153        }
2154        FpscrQc = fpscr;
2155    '''
2156    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2157            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2158    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2159            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2160
2161    vmaxfpCode = '''
2162        FPSCR fpscr = (FPSCR) FpscrExc;
2163        bool done;
2164        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2165        if (!done) {
2166            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
2167                               true, true, VfpRoundNearest);
2168        } else if (flushToZero(srcReg1, srcReg2)) {
2169            fpscr.idc = 1;
2170        }
2171        FpscrExc = fpscr;
2172    '''
2173    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2174    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2175
2176    vminfpCode = '''
2177        FPSCR fpscr = (FPSCR) FpscrExc;
2178        bool done;
2179        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2180        if (!done) {
2181            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
2182                               true, true, VfpRoundNearest);
2183        } else if (flushToZero(srcReg1, srcReg2)) {
2184            fpscr.idc = 1;
2185        }
2186        FpscrExc = fpscr;
2187    '''
2188    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2189    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2190
2191    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2192                        2, vmaxfpCode, pairwise=True)
2193    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2194                        4, vmaxfpCode, pairwise=True)
2195
2196    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2197                        2, vminfpCode, pairwise=True)
2198    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2199                        4, vminfpCode, pairwise=True)
2200
2201    vaddfpCode = '''
2202        FPSCR fpscr = (FPSCR) FpscrExc;
2203        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2204                           true, true, VfpRoundNearest);
2205        FpscrExc = fpscr;
2206    '''
2207    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2208    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2209
2210    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2211                        2, vaddfpCode, pairwise=True)
2212    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2213                        4, vaddfpCode, pairwise=True)
2214
2215    vsubfpCode = '''
2216        FPSCR fpscr = (FPSCR) FpscrExc;
2217        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2218                           true, true, VfpRoundNearest);
2219        FpscrExc = fpscr;
2220    '''
2221    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2222    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2223
2224    vmulfpCode = '''
2225        FPSCR fpscr = (FPSCR) FpscrExc;
2226        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2227                           true, true, VfpRoundNearest);
2228        FpscrExc = fpscr;
2229    '''
2230    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2231    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2232
2233    vmlafpCode = '''
2234        FPSCR fpscr = (FPSCR) FpscrExc;
2235        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2236                             true, true, VfpRoundNearest);
2237        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2238                           true, true, VfpRoundNearest);
2239        FpscrExc = fpscr;
2240    '''
2241    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2242    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2243
2244    vmlsfpCode = '''
2245        FPSCR fpscr = (FPSCR) FpscrExc;
2246        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2247                             true, true, VfpRoundNearest);
2248        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2249                           true, true, VfpRoundNearest);
2250        FpscrExc = fpscr;
2251    '''
2252    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2253    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2254
2255    vcgtfpCode = '''
2256        FPSCR fpscr = (FPSCR) FpscrExc;
2257        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2258                             true, true, VfpRoundNearest);
2259        destReg = (res == 0) ? -1 : 0;
2260        if (res == 2.0)
2261            fpscr.ioc = 1;
2262        FpscrExc = fpscr;
2263    '''
2264    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2265            2, vcgtfpCode, toInt = True)
2266    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2267            4, vcgtfpCode, toInt = True)
2268
2269    vcgefpCode = '''
2270        FPSCR fpscr = (FPSCR) FpscrExc;
2271        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2272                             true, true, VfpRoundNearest);
2273        destReg = (res == 0) ? -1 : 0;
2274        if (res == 2.0)
2275            fpscr.ioc = 1;
2276        FpscrExc = fpscr;
2277    '''
2278    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2279            2, vcgefpCode, toInt = True)
2280    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2281            4, vcgefpCode, toInt = True)
2282
2283    vacgtfpCode = '''
2284        FPSCR fpscr = (FPSCR) FpscrExc;
2285        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2286                             true, true, VfpRoundNearest);
2287        destReg = (res == 0) ? -1 : 0;
2288        if (res == 2.0)
2289            fpscr.ioc = 1;
2290        FpscrExc = fpscr;
2291    '''
2292    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2293            2, vacgtfpCode, toInt = True)
2294    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2295            4, vacgtfpCode, toInt = True)
2296
2297    vacgefpCode = '''
2298        FPSCR fpscr = (FPSCR) FpscrExc;
2299        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2300                             true, true, VfpRoundNearest);
2301        destReg = (res == 0) ? -1 : 0;
2302        if (res == 2.0)
2303            fpscr.ioc = 1;
2304        FpscrExc = fpscr;
2305    '''
2306    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2307            2, vacgefpCode, toInt = True)
2308    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2309            4, vacgefpCode, toInt = True)
2310
2311    vceqfpCode = '''
2312        FPSCR fpscr = (FPSCR) FpscrExc;
2313        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2314                             true, true, VfpRoundNearest);
2315        destReg = (res == 0) ? -1 : 0;
2316        if (res == 2.0)
2317            fpscr.ioc = 1;
2318        FpscrExc = fpscr;
2319    '''
2320    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2321            2, vceqfpCode, toInt = True)
2322    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2323            4, vceqfpCode, toInt = True)
2324
2325    vrecpsCode = '''
2326        FPSCR fpscr = (FPSCR) FpscrExc;
2327        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2328                           true, true, VfpRoundNearest);
2329        FpscrExc = fpscr;
2330    '''
2331    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2332    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2333
2334    vrsqrtsCode = '''
2335        FPSCR fpscr = (FPSCR) FpscrExc;
2336        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2337                           true, true, VfpRoundNearest);
2338        FpscrExc = fpscr;
2339    '''
2340    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2341    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2342
2343    vabdfpCode = '''
2344        FPSCR fpscr = (FPSCR) FpscrExc;
2345        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2346                             true, true, VfpRoundNearest);
2347        destReg = fabs(mid);
2348        FpscrExc = fpscr;
2349    '''
2350    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2351    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2352
2353    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2354    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2355    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2356    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2357    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2358
2359    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2360    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2361    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2362    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2363    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2364
2365    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2366    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2367    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2368    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2369    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2370
2371    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2372    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2373    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2374    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2375    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2376    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2377            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2378    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2379            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2380
2381    vshrCode = '''
2382        if (imm >= sizeof(srcElem1) * 8) {
2383            if (ltz(srcElem1))
2384                destElem = -1;
2385            else
2386                destElem = 0;
2387        } else {
2388            destElem = srcElem1 >> imm;
2389        }
2390    '''
2391    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2392    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2393
2394    vsraCode = '''
2395        Element mid;;
2396        if (imm >= sizeof(srcElem1) * 8) {
2397            mid = ltz(srcElem1) ? -1 : 0;
2398        } else {
2399            mid = srcElem1 >> imm;
2400            if (ltz(srcElem1) && !ltz(mid)) {
2401                mid |= -(mid & ((Element)1 <<
2402                            (sizeof(Element) * 8 - 1 - imm)));
2403            }
2404        }
2405        destElem += mid;
2406    '''
2407    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2408    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2409
2410    vrshrCode = '''
2411        if (imm > sizeof(srcElem1) * 8) {
2412            destElem = 0;
2413        } else if (imm) {
2414            Element rBit = bits(srcElem1, imm - 1);
2415            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2416        } else {
2417            destElem = srcElem1;
2418        }
2419    '''
2420    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2421    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2422
2423    vrsraCode = '''
2424        if (imm > sizeof(srcElem1) * 8) {
2425            destElem += 0;
2426        } else if (imm) {
2427            Element rBit = bits(srcElem1, imm - 1);
2428            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2429        } else {
2430            destElem += srcElem1;
2431        }
2432    '''
2433    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2434    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2435
2436    vsriCode = '''
2437        if (imm >= sizeof(Element) * 8)
2438            destElem = destElem;
2439        else
2440            destElem = (srcElem1 >> imm) |
2441                (destElem & ~mask(sizeof(Element) * 8 - imm));
2442    '''
2443    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2444    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2445
2446    vshlCode = '''
2447        if (imm >= sizeof(Element) * 8)
2448            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2449        else
2450            destElem = srcElem1 << imm;
2451    '''
2452    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2453    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2454
2455    vsliCode = '''
2456        if (imm >= sizeof(Element) * 8)
2457            destElem = destElem;
2458        else
2459            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2460    '''
2461    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2462    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2463
2464    vqshlCode = '''
2465        FPSCR fpscr = (FPSCR) FpscrQc;
2466        if (imm >= sizeof(Element) * 8) {
2467            if (srcElem1 != 0) {
2468                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2469                if (srcElem1 > 0)
2470                    destElem = ~destElem;
2471                fpscr.qc = 1;
2472            } else {
2473                destElem = 0;
2474            }
2475        } else if (imm) {
2476            destElem = (srcElem1 << imm);
2477            uint64_t topBits = bits((uint64_t)srcElem1,
2478                                    sizeof(Element) * 8 - 1,
2479                                    sizeof(Element) * 8 - 1 - imm);
2480            if (topBits != 0 && topBits != mask(imm + 1)) {
2481                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2482                if (srcElem1 > 0)
2483                    destElem = ~destElem;
2484                fpscr.qc = 1;
2485            }
2486        } else {
2487            destElem = srcElem1;
2488        }
2489        FpscrQc = fpscr;
2490    '''
2491    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2492    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2493
2494    vqshluCode = '''
2495        FPSCR fpscr = (FPSCR) FpscrQc;
2496        if (imm >= sizeof(Element) * 8) {
2497            if (srcElem1 != 0) {
2498                destElem = mask(sizeof(Element) * 8);
2499                fpscr.qc = 1;
2500            } else {
2501                destElem = 0;
2502            }
2503        } else if (imm) {
2504            destElem = (srcElem1 << imm);
2505            uint64_t topBits = bits((uint64_t)srcElem1,
2506                                    sizeof(Element) * 8 - 1,
2507                                    sizeof(Element) * 8 - imm);
2508            if (topBits != 0) {
2509                destElem = mask(sizeof(Element) * 8);
2510                fpscr.qc = 1;
2511            }
2512        } else {
2513            destElem = srcElem1;
2514        }
2515        FpscrQc = fpscr;
2516    '''
2517    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
2518    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
2519
2520    vqshlusCode = '''
2521        FPSCR fpscr = (FPSCR) FpscrQc;
2522        if (imm >= sizeof(Element) * 8) {
2523            if (srcElem1 < 0) {
2524                destElem = 0;
2525                fpscr.qc = 1;
2526            } else if (srcElem1 > 0) {
2527                destElem = mask(sizeof(Element) * 8);
2528                fpscr.qc = 1;
2529            } else {
2530                destElem = 0;
2531            }
2532        } else if (imm) {
2533            destElem = (srcElem1 << imm);
2534            uint64_t topBits = bits((uint64_t)srcElem1,
2535                                    sizeof(Element) * 8 - 1,
2536                                    sizeof(Element) * 8 - imm);
2537            if (srcElem1 < 0) {
2538                destElem = 0;
2539                fpscr.qc = 1;
2540            } else if (topBits != 0) {
2541                destElem = mask(sizeof(Element) * 8);
2542                fpscr.qc = 1;
2543            }
2544        } else {
2545            if (srcElem1 < 0) {
2546                fpscr.qc = 1;
2547                destElem = 0;
2548            } else {
2549                destElem = srcElem1;
2550            }
2551        }
2552        FpscrQc = fpscr;
2553    '''
2554    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
2555    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
2556
2557    vshrnCode = '''
2558        if (imm >= sizeof(srcElem1) * 8) {
2559            destElem = 0;
2560        } else {
2561            destElem = srcElem1 >> imm;
2562        }
2563    '''
2564    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
2565
2566    vrshrnCode = '''
2567        if (imm > sizeof(srcElem1) * 8) {
2568            destElem = 0;
2569        } else if (imm) {
2570            Element rBit = bits(srcElem1, imm - 1);
2571            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2572        } else {
2573            destElem = srcElem1;
2574        }
2575    '''
2576    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
2577
2578    vqshrnCode = '''
2579        FPSCR fpscr = (FPSCR) FpscrQc;
2580        if (imm > sizeof(srcElem1) * 8) {
2581            if (srcElem1 != 0 && srcElem1 != -1)
2582                fpscr.qc = 1;
2583            destElem = 0;
2584        } else if (imm) {
2585            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2586            mid |= -(mid & ((BigElement)1 <<
2587                        (sizeof(BigElement) * 8 - 1 - imm)));
2588            if (mid != (Element)mid) {
2589                destElem = mask(sizeof(Element) * 8 - 1);
2590                if (srcElem1 < 0)
2591                    destElem = ~destElem;
2592                fpscr.qc = 1;
2593            } else {
2594                destElem = mid;
2595            }
2596        } else {
2597            destElem = srcElem1;
2598        }
2599        FpscrQc = fpscr;
2600    '''
2601    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
2602
2603    vqshrunCode = '''
2604        FPSCR fpscr = (FPSCR) FpscrQc;
2605        if (imm > sizeof(srcElem1) * 8) {
2606            if (srcElem1 != 0)
2607                fpscr.qc = 1;
2608            destElem = 0;
2609        } else if (imm) {
2610            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2611            if (mid != (Element)mid) {
2612                destElem = mask(sizeof(Element) * 8);
2613                fpscr.qc = 1;
2614            } else {
2615                destElem = mid;
2616            }
2617        } else {
2618            destElem = srcElem1;
2619        }
2620        FpscrQc = fpscr;
2621    '''
2622    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
2623                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
2624
2625    vqshrunsCode = '''
2626        FPSCR fpscr = (FPSCR) FpscrQc;
2627        if (imm > sizeof(srcElem1) * 8) {
2628            if (srcElem1 != 0)
2629                fpscr.qc = 1;
2630            destElem = 0;
2631        } else if (imm) {
2632            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
2633            if (bits(mid, sizeof(BigElement) * 8 - 1,
2634                          sizeof(Element) * 8) != 0) {
2635                if (srcElem1 < 0) {
2636                    destElem = 0;
2637                } else {
2638                    destElem = mask(sizeof(Element) * 8);
2639                }
2640                fpscr.qc = 1;
2641            } else {
2642                destElem = mid;
2643            }
2644        } else {
2645            destElem = srcElem1;
2646        }
2647        FpscrQc = fpscr;
2648    '''
2649    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
2650                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
2651
2652    vqrshrnCode = '''
2653        FPSCR fpscr = (FPSCR) FpscrQc;
2654        if (imm > sizeof(srcElem1) * 8) {
2655            if (srcElem1 != 0 && srcElem1 != -1)
2656                fpscr.qc = 1;
2657            destElem = 0;
2658        } else if (imm) {
2659            BigElement mid = (srcElem1 >> (imm - 1));
2660            uint64_t rBit = mid & 0x1;
2661            mid >>= 1;
2662            mid |= -(mid & ((BigElement)1 <<
2663                        (sizeof(BigElement) * 8 - 1 - imm)));
2664            mid += rBit;
2665            if (mid != (Element)mid) {
2666                destElem = mask(sizeof(Element) * 8 - 1);
2667                if (srcElem1 < 0)
2668                    destElem = ~destElem;
2669                fpscr.qc = 1;
2670            } else {
2671                destElem = mid;
2672            }
2673        } else {
2674            if (srcElem1 != (Element)srcElem1) {
2675                destElem = mask(sizeof(Element) * 8 - 1);
2676                if (srcElem1 < 0)
2677                    destElem = ~destElem;
2678                fpscr.qc = 1;
2679            } else {
2680                destElem = srcElem1;
2681            }
2682        }
2683        FpscrQc = fpscr;
2684    '''
2685    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
2686                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
2687
2688    vqrshrunCode = '''
2689        FPSCR fpscr = (FPSCR) FpscrQc;
2690        if (imm > sizeof(srcElem1) * 8) {
2691            if (srcElem1 != 0)
2692                fpscr.qc = 1;
2693            destElem = 0;
2694        } else if (imm) {
2695            BigElement mid = (srcElem1 >> (imm - 1));
2696            uint64_t rBit = mid & 0x1;
2697            mid >>= 1;
2698            mid += rBit;
2699            if (mid != (Element)mid) {
2700                destElem = mask(sizeof(Element) * 8);
2701                fpscr.qc = 1;
2702            } else {
2703                destElem = mid;
2704            }
2705        } else {
2706            if (srcElem1 != (Element)srcElem1) {
2707                destElem = mask(sizeof(Element) * 8 - 1);
2708                fpscr.qc = 1;
2709            } else {
2710                destElem = srcElem1;
2711            }
2712        }
2713        FpscrQc = fpscr;
2714    '''
2715    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
2716                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
2717
2718    vqrshrunsCode = '''
2719        FPSCR fpscr = (FPSCR) FpscrQc;
2720        if (imm > sizeof(srcElem1) * 8) {
2721            if (srcElem1 != 0)
2722                fpscr.qc = 1;
2723            destElem = 0;
2724        } else if (imm) {
2725            BigElement mid = (srcElem1 >> (imm - 1));
2726            uint64_t rBit = mid & 0x1;
2727            mid >>= 1;
2728            mid |= -(mid & ((BigElement)1 <<
2729                            (sizeof(BigElement) * 8 - 1 - imm)));
2730            mid += rBit;
2731            if (bits(mid, sizeof(BigElement) * 8 - 1,
2732                          sizeof(Element) * 8) != 0) {
2733                if (srcElem1 < 0) {
2734                    destElem = 0;
2735                } else {
2736                    destElem = mask(sizeof(Element) * 8);
2737                }
2738                fpscr.qc = 1;
2739            } else {
2740                destElem = mid;
2741            }
2742        } else {
2743            if (srcElem1 < 0) {
2744                fpscr.qc = 1;
2745                destElem = 0;
2746            } else {
2747                destElem = srcElem1;
2748            }
2749        }
2750        FpscrQc = fpscr;
2751    '''
2752    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
2753                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
2754
2755    vshllCode = '''
2756        if (imm >= sizeof(destElem) * 8) {
2757            destElem = 0;
2758        } else {
2759            destElem = (BigElement)srcElem1 << imm;
2760        }
2761    '''
2762    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
2763
2764    vmovlCode = '''
2765        destElem = srcElem1;
2766    '''
2767    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
2768
2769    vcvt2ufxCode = '''
2770        FPSCR fpscr = (FPSCR) FpscrExc;
2771        if (flushToZero(srcElem1))
2772            fpscr.idc = 1;
2773        VfpSavedState state = prepFpState(VfpRoundNearest);
2774        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2775        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
2776        __asm__ __volatile__("" :: "m" (destReg));
2777        finishVfp(fpscr, state, true);
2778        FpscrExc = fpscr;
2779    '''
2780    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
2781            2, vcvt2ufxCode, toInt = True)
2782    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
2783            4, vcvt2ufxCode, toInt = True)
2784
2785    vcvt2sfxCode = '''
2786        FPSCR fpscr = (FPSCR) FpscrExc;
2787        if (flushToZero(srcElem1))
2788            fpscr.idc = 1;
2789        VfpSavedState state = prepFpState(VfpRoundNearest);
2790        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
2791        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
2792        __asm__ __volatile__("" :: "m" (destReg));
2793        finishVfp(fpscr, state, true);
2794        FpscrExc = fpscr;
2795    '''
2796    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
2797            2, vcvt2sfxCode, toInt = True)
2798    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
2799            4, vcvt2sfxCode, toInt = True)
2800
2801    vcvtu2fpCode = '''
2802        FPSCR fpscr = (FPSCR) FpscrExc;
2803        VfpSavedState state = prepFpState(VfpRoundNearest);
2804        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2805        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
2806        __asm__ __volatile__("" :: "m" (destElem));
2807        finishVfp(fpscr, state, true);
2808        FpscrExc = fpscr;
2809    '''
2810    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
2811            2, vcvtu2fpCode, fromInt = True)
2812    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
2813            4, vcvtu2fpCode, fromInt = True)
2814
2815    vcvts2fpCode = '''
2816        FPSCR fpscr = (FPSCR) FpscrExc;
2817        VfpSavedState state = prepFpState(VfpRoundNearest);
2818        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
2819        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
2820        __asm__ __volatile__("" :: "m" (destElem));
2821        finishVfp(fpscr, state, true);
2822        FpscrExc = fpscr;
2823    '''
2824    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
2825            2, vcvts2fpCode, fromInt = True)
2826    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
2827            4, vcvts2fpCode, fromInt = True)
2828
2829    vcvts2hCode = '''
2830        FPSCR fpscr = (FPSCR) FpscrExc;
2831        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
2832        if (flushToZero(srcFp1))
2833            fpscr.idc = 1;
2834        VfpSavedState state = prepFpState(VfpRoundNearest);
2835        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
2836                                : "m" (srcFp1), "m" (destElem));
2837        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
2838                              fpscr.ahp, srcFp1);
2839        __asm__ __volatile__("" :: "m" (destElem));
2840        finishVfp(fpscr, state, true);
2841        FpscrExc = fpscr;
2842    '''
2843    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
2844
2845    vcvth2sCode = '''
2846        FPSCR fpscr = (FPSCR) FpscrExc;
2847        VfpSavedState state = prepFpState(VfpRoundNearest);
2848        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
2849                                : "m" (srcElem1), "m" (destElem));
2850        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
2851        __asm__ __volatile__("" :: "m" (destElem));
2852        finishVfp(fpscr, state, true);
2853        FpscrExc = fpscr;
2854    '''
2855    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
2856
2857    vrsqrteCode = '''
2858        destElem = unsignedRSqrtEstimate(srcElem1);
2859    '''
2860    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
2861    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
2862
2863    vrsqrtefpCode = '''
2864        FPSCR fpscr = (FPSCR) FpscrExc;
2865        if (flushToZero(srcReg1))
2866            fpscr.idc = 1;
2867        destReg = fprSqrtEstimate(fpscr, srcReg1);
2868        FpscrExc = fpscr;
2869    '''
2870    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
2871    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
2872
2873    vrecpeCode = '''
2874        destElem = unsignedRecipEstimate(srcElem1);
2875    '''
2876    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
2877    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
2878
2879    vrecpefpCode = '''
2880        FPSCR fpscr = (FPSCR) FpscrExc;
2881        if (flushToZero(srcReg1))
2882            fpscr.idc = 1;
2883        destReg = fpRecipEstimate(fpscr, srcReg1);
2884        FpscrExc = fpscr;
2885    '''
2886    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
2887    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
2888
2889    vrev16Code = '''
2890        destElem = srcElem1;
2891        unsigned groupSize = ((1 << 1) / sizeof(Element));
2892        unsigned reverseMask = (groupSize - 1);
2893        j = i ^ reverseMask;
2894    '''
2895    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
2896    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
2897    vrev32Code = '''
2898        destElem = srcElem1;
2899        unsigned groupSize = ((1 << 2) / sizeof(Element));
2900        unsigned reverseMask = (groupSize - 1);
2901        j = i ^ reverseMask;
2902    '''
2903    twoRegMiscInst("vrev32", "NVrev32D",
2904            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
2905    twoRegMiscInst("vrev32", "NVrev32Q",
2906            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
2907    vrev64Code = '''
2908        destElem = srcElem1;
2909        unsigned groupSize = ((1 << 3) / sizeof(Element));
2910        unsigned reverseMask = (groupSize - 1);
2911        j = i ^ reverseMask;
2912    '''
2913    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
2914    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
2915
2916    vpaddlCode = '''
2917        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2918    '''
2919    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
2920    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
2921
2922    vpadalCode = '''
2923        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
2924    '''
2925    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
2926    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
2927
2928    vclsCode = '''
2929        unsigned count = 0;
2930        if (srcElem1 < 0) {
2931            srcElem1 <<= 1;
2932            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
2933                count++;
2934                srcElem1 <<= 1;
2935            }
2936        } else {
2937            srcElem1 <<= 1;
2938            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
2939                count++;
2940                srcElem1 <<= 1;
2941            }
2942        }
2943        destElem = count;
2944    '''
2945    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
2946    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
2947
2948    vclzCode = '''
2949        unsigned count = 0;
2950        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
2951            count++;
2952            srcElem1 <<= 1;
2953        }
2954        destElem = count;
2955    '''
2956    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
2957    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
2958
2959    vcntCode = '''
2960        unsigned count = 0;
2961        while (srcElem1 && count < sizeof(Element) * 8) {
2962            count += srcElem1 & 0x1;
2963            srcElem1 >>= 1;
2964        }
2965        destElem = count;
2966    '''
2967
2968    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
2969    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
2970
2971    vmvnCode = '''
2972        destElem = ~srcElem1;
2973    '''
2974    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
2975    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
2976
2977    vqabsCode = '''
2978        FPSCR fpscr = (FPSCR) FpscrQc;
2979        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2980            fpscr.qc = 1;
2981            destElem = ~srcElem1;
2982        } else if (srcElem1 < 0) {
2983            destElem = -srcElem1;
2984        } else {
2985            destElem = srcElem1;
2986        }
2987        FpscrQc = fpscr;
2988    '''
2989    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
2990    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
2991
2992    vqnegCode = '''
2993        FPSCR fpscr = (FPSCR) FpscrQc;
2994        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
2995            fpscr.qc = 1;
2996            destElem = ~srcElem1;
2997        } else {
2998            destElem = -srcElem1;
2999        }
3000        FpscrQc = fpscr;
3001    '''
3002    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3003    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3004
3005    vabsCode = '''
3006        if (srcElem1 < 0) {
3007            destElem = -srcElem1;
3008        } else {
3009            destElem = srcElem1;
3010        }
3011    '''
3012
3013    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3014    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3015    vabsfpCode = '''
3016        union
3017        {
3018            uint32_t i;
3019            float f;
3020        } cStruct;
3021        cStruct.f = srcReg1;
3022        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3023        destReg = cStruct.f;
3024    '''
3025    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3026    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3027
3028    vnegCode = '''
3029        destElem = -srcElem1;
3030    '''
3031    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3032    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3033    vnegfpCode = '''
3034        destReg = -srcReg1;
3035    '''
3036    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3037    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3038
3039    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3040    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3041    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3042    vcgtfpCode = '''
3043        FPSCR fpscr = (FPSCR) FpscrExc;
3044        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3045                             true, true, VfpRoundNearest);
3046        destReg = (res == 0) ? -1 : 0;
3047        if (res == 2.0)
3048            fpscr.ioc = 1;
3049        FpscrExc = fpscr;
3050    '''
3051    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3052            2, vcgtfpCode, toInt = True)
3053    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3054            4, vcgtfpCode, toInt = True)
3055
3056    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3057    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3058    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3059    vcgefpCode = '''
3060        FPSCR fpscr = (FPSCR) FpscrExc;
3061        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3062                             true, true, VfpRoundNearest);
3063        destReg = (res == 0) ? -1 : 0;
3064        if (res == 2.0)
3065            fpscr.ioc = 1;
3066        FpscrExc = fpscr;
3067    '''
3068    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3069            2, vcgefpCode, toInt = True)
3070    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3071            4, vcgefpCode, toInt = True)
3072
3073    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3074    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3075    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3076    vceqfpCode = '''
3077        FPSCR fpscr = (FPSCR) FpscrExc;
3078        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3079                             true, true, VfpRoundNearest);
3080        destReg = (res == 0) ? -1 : 0;
3081        if (res == 2.0)
3082            fpscr.ioc = 1;
3083        FpscrExc = fpscr;
3084    '''
3085    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3086            2, vceqfpCode, toInt = True)
3087    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3088            4, vceqfpCode, toInt = True)
3089
3090    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3091    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3092    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3093    vclefpCode = '''
3094        FPSCR fpscr = (FPSCR) FpscrExc;
3095        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3096                             true, true, VfpRoundNearest);
3097        destReg = (res == 0) ? -1 : 0;
3098        if (res == 2.0)
3099            fpscr.ioc = 1;
3100        FpscrExc = fpscr;
3101    '''
3102    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3103            2, vclefpCode, toInt = True)
3104    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3105            4, vclefpCode, toInt = True)
3106
3107    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3108    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3109    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3110    vcltfpCode = '''
3111        FPSCR fpscr = (FPSCR) FpscrExc;
3112        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3113                             true, true, VfpRoundNearest);
3114        destReg = (res == 0) ? -1 : 0;
3115        if (res == 2.0)
3116            fpscr.ioc = 1;
3117        FpscrExc = fpscr;
3118    '''
3119    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3120            2, vcltfpCode, toInt = True)
3121    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3122            4, vcltfpCode, toInt = True)
3123
3124    vswpCode = '''
3125        FloatRegBits mid;
3126        for (unsigned r = 0; r < rCount; r++) {
3127            mid = srcReg1.regs[r];
3128            srcReg1.regs[r] = destReg.regs[r];
3129            destReg.regs[r] = mid;
3130        }
3131    '''
3132    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3133    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3134
3135    vtrnCode = '''
3136        Element mid;
3137        for (unsigned i = 0; i < eCount; i += 2) {
3138            mid = srcReg1.elements[i];
3139            srcReg1.elements[i] = destReg.elements[i + 1];
3140            destReg.elements[i + 1] = mid;
3141        }
3142    '''
3143    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp", unsignedTypes, 2, vtrnCode)
3144    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp", unsignedTypes, 4, vtrnCode)
3145
3146    vuzpCode = '''
3147        Element mid[eCount];
3148        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3149        for (unsigned i = 0; i < eCount / 2; i++) {
3150            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3151            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3152            destReg.elements[i] = destReg.elements[2 * i];
3153        }
3154        for (unsigned i = 0; i < eCount / 2; i++) {
3155            destReg.elements[eCount / 2 + i] = mid[2 * i];
3156        }
3157    '''
3158    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3159    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3160
3161    vzipCode = '''
3162        Element mid[eCount];
3163        memcpy(&mid, &destReg, sizeof(destReg));
3164        for (unsigned i = 0; i < eCount / 2; i++) {
3165            destReg.elements[2 * i] = mid[i];
3166            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3167        }
3168        for (int i = 0; i < eCount / 2; i++) {
3169            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3170            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3171        }
3172    '''
3173    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3174    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3175
3176    vmovnCode = 'destElem = srcElem1;'
3177    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3178
3179    vdupCode = 'destElem = srcElem1;'
3180    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3181    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3182
3183    def vdupGprInst(name, Name, opClass, types, rCount):
3184        global header_output, exec_output
3185        eWalkCode = '''
3186        RegVect destReg;
3187        for (unsigned i = 0; i < eCount; i++) {
3188            destReg.elements[i] = htog((Element)Op1);
3189        }
3190        '''
3191        for reg in range(rCount):
3192            eWalkCode += '''
3193            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3194            ''' % { "reg" : reg }
3195        iop = InstObjParams(name, Name,
3196                            "RegRegOp",
3197                            { "code": eWalkCode,
3198                              "r_count": rCount,
3199                              "predicate_test": predicateTest,
3200                              "op_class": opClass }, [])
3201        header_output += NeonRegRegOpDeclare.subst(iop)
3202        exec_output += NeonEqualRegExecute.subst(iop)
3203        for type in types:
3204            substDict = { "targs" : type,
3205                          "class_name" : Name }
3206            exec_output += NeonExecDeclare.subst(substDict)
3207    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3208    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3209
3210    vmovCode = 'destElem = imm;'
3211    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3212    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3213
3214    vorrCode = 'destElem |= imm;'
3215    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3216    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3217
3218    vmvnCode = 'destElem = ~imm;'
3219    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3220    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3221
3222    vbicCode = 'destElem &= ~imm;'
3223    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3224    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3225
3226    vqmovnCode = '''
3227    FPSCR fpscr = (FPSCR) FpscrQc;
3228    destElem = srcElem1;
3229    if ((BigElement)destElem != srcElem1) {
3230        fpscr.qc = 1;
3231        destElem = mask(sizeof(Element) * 8 - 1);
3232        if (srcElem1 < 0)
3233            destElem = ~destElem;
3234    }
3235    FpscrQc = fpscr;
3236    '''
3237    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3238
3239    vqmovunCode = '''
3240    FPSCR fpscr = (FPSCR) FpscrQc;
3241    destElem = srcElem1;
3242    if ((BigElement)destElem != srcElem1) {
3243        fpscr.qc = 1;
3244        destElem = mask(sizeof(Element) * 8);
3245    }
3246    FpscrQc = fpscr;
3247    '''
3248    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3249            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3250
3251    vqmovunsCode = '''
3252    FPSCR fpscr = (FPSCR) FpscrQc;
3253    destElem = srcElem1;
3254    if (srcElem1 < 0 ||
3255            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3256        fpscr.qc = 1;
3257        destElem = mask(sizeof(Element) * 8);
3258        if (srcElem1 < 0)
3259            destElem = ~destElem;
3260    }
3261    FpscrQc = fpscr;
3262    '''
3263    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3264            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3265
3266    def buildVext(name, Name, opClass, types, rCount, op):
3267        global header_output, exec_output
3268        eWalkCode = '''
3269        RegVect srcReg1, srcReg2, destReg;
3270        '''
3271        for reg in range(rCount):
3272            eWalkCode += simdEnabledCheckCode + '''
3273                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3274                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3275            ''' % { "reg" : reg }
3276        eWalkCode += op
3277        for reg in range(rCount):
3278            eWalkCode += '''
3279            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3280            ''' % { "reg" : reg }
3281        iop = InstObjParams(name, Name,
3282                            "RegRegRegImmOp",
3283                            { "code": eWalkCode,
3284                              "r_count": rCount,
3285                              "predicate_test": predicateTest,
3286                              "op_class": opClass }, [])
3287        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3288        exec_output += NeonEqualRegExecute.subst(iop)
3289        for type in types:
3290            substDict = { "targs" : type,
3291                          "class_name" : Name }
3292            exec_output += NeonExecDeclare.subst(substDict)
3293
3294    vextCode = '''
3295        for (unsigned i = 0; i < eCount; i++) {
3296            unsigned index = i + imm;
3297            if (index < eCount) {
3298                destReg.elements[i] = srcReg1.elements[index];
3299            } else {
3300                index -= eCount;
3301                if (index >= eCount)
3302#if FULL_SYSTEM
3303                    fault = new UndefinedInstruction;
3304#else
3305                    fault = new UndefinedInstruction(false, mnemonic);
3306#endif
3307                else
3308                    destReg.elements[i] = srcReg2.elements[index];
3309            }
3310        }
3311    '''
3312    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3313    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3314
3315    def buildVtbxl(name, Name, opClass, length, isVtbl):
3316        global header_output, decoder_output, exec_output
3317        code = '''
3318            union
3319            {
3320                uint8_t bytes[32];
3321                FloatRegBits regs[8];
3322            } table;
3323
3324            union
3325            {
3326                uint8_t bytes[8];
3327                FloatRegBits regs[2];
3328            } destReg, srcReg2;
3329
3330            const unsigned length = %(length)d;
3331            const bool isVtbl = %(isVtbl)s;
3332
3333            srcReg2.regs[0] = htog(FpOp2P0_uw);
3334            srcReg2.regs[1] = htog(FpOp2P1_uw);
3335
3336            destReg.regs[0] = htog(FpDestP0_uw);
3337            destReg.regs[1] = htog(FpDestP1_uw);
3338        ''' % { "length" : length, "isVtbl" : isVtbl }
3339        for reg in range(8):
3340            if reg < length * 2:
3341                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3342                        { "reg" : reg }
3343            else:
3344                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3345        code += '''
3346        for (unsigned i = 0; i < sizeof(destReg); i++) {
3347            uint8_t index = srcReg2.bytes[i];
3348            if (index < 8 * length) {
3349                destReg.bytes[i] = table.bytes[index];
3350            } else {
3351                if (isVtbl)
3352                    destReg.bytes[i] = 0;
3353                // else destReg.bytes[i] unchanged
3354            }
3355        }
3356
3357        FpDestP0_uw = gtoh(destReg.regs[0]);
3358        FpDestP1_uw = gtoh(destReg.regs[1]);
3359        '''
3360        iop = InstObjParams(name, Name,
3361                            "RegRegRegOp",
3362                            { "code": code,
3363                              "predicate_test": predicateTest,
3364                              "op_class": opClass }, [])
3365        header_output += RegRegRegOpDeclare.subst(iop)
3366        decoder_output += RegRegRegOpConstructor.subst(iop)
3367        exec_output += PredOpExecute.subst(iop)
3368
3369    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3370    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3371    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3372    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3373
3374    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3375    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3376    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3377    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3378}};
3379