neon.isa revision 12038
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                          ExtMachInst machInst, IntRegIndex dest,
98                          IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                          ExtMachInst machInst, IntRegIndex dest,
116                          IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133                             IntRegIndex dest, IntRegIndex op1,
134                             IntRegIndex op2)
135    {
136        switch (size) {
137          case 1:
138            return new Base<int16_t>(machInst, dest, op1, op2);
139          case 2:
140            return new Base<int32_t>(machInst, dest, op1, op2);
141          default:
142            return new Unknown(machInst);
143        }
144    }
145
146    template <template <typename T> class Base>
147    StaticInstPtr
148    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149                                IntRegIndex dest, IntRegIndex op1,
150                                IntRegIndex op2, uint64_t imm)
151    {
152        switch (size) {
153          case 1:
154            return new Base<int16_t>(machInst, dest, op1, op2, imm);
155          case 2:
156            return new Base<int32_t>(machInst, dest, op1, op2, imm);
157          default:
158            return new Unknown(machInst);
159        }
160    }
161
162    template <template <typename T> class Base>
163    StaticInstPtr
164    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165                           ExtMachInst machInst, IntRegIndex dest,
166                           IntRegIndex op1, IntRegIndex op2)
167    {
168        if (notSigned) {
169            return decodeNeonUThreeUSReg<Base>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<Base>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUThreeSReg(bool q, unsigned size,
181                         ExtMachInst machInst, IntRegIndex dest,
182                         IntRegIndex op1, IntRegIndex op2)
183    {
184        if (q) {
185            return decodeNeonUThreeUSReg<BaseQ>(
186                    size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonUThreeUSReg<BaseD>(
189                    size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonSThreeSReg(bool q, unsigned size,
197                         ExtMachInst machInst, IntRegIndex dest,
198                         IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonSThreeUSReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonSThreeUSReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeXReg(bool q, unsigned size,
213                         ExtMachInst machInst, IntRegIndex dest,
214                         IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUSReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUThreeXReg(bool q, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (q) {
233            return decodeNeonUThreeUReg<BaseQ>(
234                    size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonUThreeUSReg<BaseD>(
237                    size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245                          ExtMachInst machInst, IntRegIndex dest,
246                          IntRegIndex op1, IntRegIndex op2)
247    {
248        if (notSigned) {
249            return decodeNeonUThreeSReg<BaseD, BaseQ>(
250                    q, size, machInst, dest, op1, op2);
251        } else {
252            return decodeNeonSThreeSReg<BaseD, BaseQ>(
253                    q, size, machInst, dest, op1, op2);
254        }
255    }
256
257    template <template <typename T> class BaseD,
258              template <typename T> class BaseQ>
259    StaticInstPtr
260    decodeNeonUThreeReg(bool q, unsigned size,
261                        ExtMachInst machInst, IntRegIndex dest,
262                        IntRegIndex op1, IntRegIndex op2)
263    {
264        if (q) {
265            return decodeNeonUThreeUReg<BaseQ>(
266                    size, machInst, dest, op1, op2);
267        } else {
268            return decodeNeonUThreeUReg<BaseD>(
269                    size, machInst, dest, op1, op2);
270        }
271    }
272
273    template <template <typename T> class BaseD,
274              template <typename T> class BaseQ>
275    StaticInstPtr
276    decodeNeonSThreeReg(bool q, unsigned size,
277                        ExtMachInst machInst, IntRegIndex dest,
278                        IntRegIndex op1, IntRegIndex op2)
279    {
280        if (q) {
281            return decodeNeonSThreeUReg<BaseQ>(
282                    size, machInst, dest, op1, op2);
283        } else {
284            return decodeNeonSThreeUReg<BaseD>(
285                    size, machInst, dest, op1, op2);
286        }
287    }
288
289    template <template <typename T> class BaseD,
290              template <typename T> class BaseQ>
291    StaticInstPtr
292    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293                         ExtMachInst machInst, IntRegIndex dest,
294                         IntRegIndex op1, IntRegIndex op2)
295    {
296        if (notSigned) {
297            return decodeNeonUThreeReg<BaseD, BaseQ>(
298                    q, size, machInst, dest, op1, op2);
299        } else {
300            return decodeNeonSThreeReg<BaseD, BaseQ>(
301                    q, size, machInst, dest, op1, op2);
302        }
303    }
304
305    template <template <typename T> class BaseD,
306              template <typename T> class BaseQ>
307    StaticInstPtr
308    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310    {
311        if (q) {
312            if (size)
313                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314            else
315                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316        } else {
317            if (size)
318                return new Unknown(machInst);
319            else
320                return new BaseD<uint32_t>(machInst, dest, op1, op2);
321        }
322    }
323
324    template <template <typename T> class Base>
325    StaticInstPtr
326    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328    {
329        if (size)
330            return new Base<uint64_t>(machInst, dest, op1, op2);
331        else
332            return new Base<uint32_t>(machInst, dest, op1, op2);
333    }
334
335    template <template <typename T> class Base>
336    StaticInstPtr
337    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338                               IntRegIndex dest, IntRegIndex op1,
339                               IntRegIndex op2, uint64_t imm)
340    {
341        if (size)
342            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343        else
344            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345    }
346
347    template <template <typename T> class BaseD,
348              template <typename T> class BaseQ>
349    StaticInstPtr
350    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351                                IntRegIndex dest, IntRegIndex op1,
352                                IntRegIndex op2, uint64_t imm)
353    {
354        if (q) {
355            switch (size) {
356              case 1:
357                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358              case 2:
359                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360              default:
361                return new Unknown(machInst);
362            }
363        } else {
364            switch (size) {
365              case 1:
366                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367              case 2:
368                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369              default:
370                return new Unknown(machInst);
371            }
372        }
373    }
374
375    template <template <typename T> class BaseD,
376              template <typename T> class BaseQ>
377    StaticInstPtr
378    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379                                IntRegIndex dest, IntRegIndex op1,
380                                IntRegIndex op2, uint64_t imm)
381    {
382        if (q) {
383            switch (size) {
384              case 1:
385                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386              case 2:
387                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388              default:
389                return new Unknown(machInst);
390            }
391        } else {
392            switch (size) {
393              case 1:
394                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395              case 2:
396                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397              default:
398                return new Unknown(machInst);
399            }
400        }
401    }
402
403    template <template <typename T> class BaseD,
404              template <typename T> class BaseQ>
405    StaticInstPtr
406    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407                             IntRegIndex dest, IntRegIndex op1,
408                             IntRegIndex op2, uint64_t imm)
409    {
410        if (q) {
411            if (size)
412                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413            else
414                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415        } else {
416            if (size)
417                return new Unknown(machInst);
418            else
419                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420        }
421    }
422
423    template <template <typename T> class BaseD,
424              template <typename T> class BaseQ>
425    StaticInstPtr
426    decodeNeonUTwoShiftReg(bool q, unsigned size,
427                           ExtMachInst machInst, IntRegIndex dest,
428                           IntRegIndex op1, uint64_t imm)
429    {
430        if (q) {
431            switch (size) {
432              case 0:
433                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434              case 1:
435                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436              case 2:
437                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438              case 3:
439                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440              default:
441                return new Unknown(machInst);
442            }
443        } else {
444            switch (size) {
445              case 0:
446                return new BaseD<uint8_t>(machInst, dest, op1, imm);
447              case 1:
448                return new BaseD<uint16_t>(machInst, dest, op1, imm);
449              case 2:
450                return new BaseD<uint32_t>(machInst, dest, op1, imm);
451              case 3:
452                return new BaseD<uint64_t>(machInst, dest, op1, imm);
453              default:
454                return new Unknown(machInst);
455            }
456        }
457    }
458
459    template <template <typename T> class BaseD,
460              template <typename T> class BaseQ>
461    StaticInstPtr
462    decodeNeonSTwoShiftReg(bool q, unsigned size,
463                           ExtMachInst machInst, IntRegIndex dest,
464                           IntRegIndex op1, uint64_t imm)
465    {
466        if (q) {
467            switch (size) {
468              case 0:
469                return new BaseQ<int8_t>(machInst, dest, op1, imm);
470              case 1:
471                return new BaseQ<int16_t>(machInst, dest, op1, imm);
472              case 2:
473                return new BaseQ<int32_t>(machInst, dest, op1, imm);
474              case 3:
475                return new BaseQ<int64_t>(machInst, dest, op1, imm);
476              default:
477                return new Unknown(machInst);
478            }
479        } else {
480            switch (size) {
481              case 0:
482                return new BaseD<int8_t>(machInst, dest, op1, imm);
483              case 1:
484                return new BaseD<int16_t>(machInst, dest, op1, imm);
485              case 2:
486                return new BaseD<int32_t>(machInst, dest, op1, imm);
487              case 3:
488                return new BaseD<int64_t>(machInst, dest, op1, imm);
489              default:
490                return new Unknown(machInst);
491            }
492        }
493    }
494
495
496    template <template <typename T> class BaseD,
497              template <typename T> class BaseQ>
498    StaticInstPtr
499    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500                            ExtMachInst machInst, IntRegIndex dest,
501                            IntRegIndex op1, uint64_t imm)
502    {
503        if (notSigned) {
504            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505                    q, size, machInst, dest, op1, imm);
506        } else {
507            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508                    q, size, machInst, dest, op1, imm);
509        }
510    }
511
512    template <template <typename T> class Base>
513    StaticInstPtr
514    decodeNeonUTwoShiftUSReg(unsigned size,
515                             ExtMachInst machInst, IntRegIndex dest,
516                             IntRegIndex op1, uint64_t imm)
517    {
518        switch (size) {
519          case 0:
520            return new Base<uint8_t>(machInst, dest, op1, imm);
521          case 1:
522            return new Base<uint16_t>(machInst, dest, op1, imm);
523          case 2:
524            return new Base<uint32_t>(machInst, dest, op1, imm);
525          default:
526            return new Unknown(machInst);
527        }
528    }
529
530    template <template <typename T> class Base>
531    StaticInstPtr
532    decodeNeonUTwoShiftUReg(unsigned size,
533                            ExtMachInst machInst, IntRegIndex dest,
534                            IntRegIndex op1, uint64_t imm)
535    {
536        switch (size) {
537          case 0:
538            return new Base<uint8_t>(machInst, dest, op1, imm);
539          case 1:
540            return new Base<uint16_t>(machInst, dest, op1, imm);
541          case 2:
542            return new Base<uint32_t>(machInst, dest, op1, imm);
543          case 3:
544            return new Base<uint64_t>(machInst, dest, op1, imm);
545          default:
546            return new Unknown(machInst);
547        }
548    }
549
550    template <template <typename T> class Base>
551    StaticInstPtr
552    decodeNeonSTwoShiftUReg(unsigned size,
553                            ExtMachInst machInst, IntRegIndex dest,
554                            IntRegIndex op1, uint64_t imm)
555    {
556        switch (size) {
557          case 0:
558            return new Base<int8_t>(machInst, dest, op1, imm);
559          case 1:
560            return new Base<int16_t>(machInst, dest, op1, imm);
561          case 2:
562            return new Base<int32_t>(machInst, dest, op1, imm);
563          case 3:
564            return new Base<int64_t>(machInst, dest, op1, imm);
565          default:
566            return new Unknown(machInst);
567        }
568    }
569
570    template <template <typename T> class BaseD,
571              template <typename T> class BaseQ>
572    StaticInstPtr
573    decodeNeonUTwoShiftSReg(bool q, unsigned size,
574                            ExtMachInst machInst, IntRegIndex dest,
575                            IntRegIndex op1, uint64_t imm)
576    {
577        if (q) {
578            return decodeNeonUTwoShiftUSReg<BaseQ>(
579                    size, machInst, dest, op1, imm);
580        } else {
581            return decodeNeonUTwoShiftUSReg<BaseD>(
582                    size, machInst, dest, op1, imm);
583        }
584    }
585
586    template <template <typename T> class Base>
587    StaticInstPtr
588    decodeNeonSTwoShiftUSReg(unsigned size,
589                             ExtMachInst machInst, IntRegIndex dest,
590                             IntRegIndex op1, uint64_t imm)
591    {
592        switch (size) {
593          case 0:
594            return new Base<int8_t>(machInst, dest, op1, imm);
595          case 1:
596            return new Base<int16_t>(machInst, dest, op1, imm);
597          case 2:
598            return new Base<int32_t>(machInst, dest, op1, imm);
599          default:
600            return new Unknown(machInst);
601        }
602    }
603
604    template <template <typename T> class BaseD,
605              template <typename T> class BaseQ>
606    StaticInstPtr
607    decodeNeonSTwoShiftSReg(bool q, unsigned size,
608                            ExtMachInst machInst, IntRegIndex dest,
609                            IntRegIndex op1, uint64_t imm)
610    {
611        if (q) {
612            return decodeNeonSTwoShiftUSReg<BaseQ>(
613                    size, machInst, dest, op1, imm);
614        } else {
615            return decodeNeonSTwoShiftUSReg<BaseD>(
616                    size, machInst, dest, op1, imm);
617        }
618    }
619
620    template <template <typename T> class BaseD,
621              template <typename T> class BaseQ>
622    StaticInstPtr
623    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624                             ExtMachInst machInst, IntRegIndex dest,
625                             IntRegIndex op1, uint64_t imm)
626    {
627        if (notSigned) {
628            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629                    q, size, machInst, dest, op1, imm);
630        } else {
631            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632                    q, size, machInst, dest, op1, imm);
633        }
634    }
635
636    template <template <typename T> class BaseD,
637              template <typename T> class BaseQ>
638    StaticInstPtr
639    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641    {
642        if (q) {
643            return decodeNeonUTwoShiftUReg<BaseQ>(
644                size, machInst, dest, op1, imm);
645        } else {
646            return decodeNeonUTwoShiftUSReg<BaseD>(
647                size, machInst, dest, op1, imm);
648        }
649    }
650
651    template <template <typename T> class BaseD,
652              template <typename T> class BaseQ>
653    StaticInstPtr
654    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656    {
657        if (q) {
658            return decodeNeonSTwoShiftUReg<BaseQ>(
659                size, machInst, dest, op1, imm);
660        } else {
661            return decodeNeonSTwoShiftUSReg<BaseD>(
662                size, machInst, dest, op1, imm);
663        }
664    }
665
666    template <template <typename T> class Base>
667    StaticInstPtr
668    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670    {
671        if (size)
672            return new Base<uint64_t>(machInst, dest, op1, imm);
673        else
674            return new Base<uint32_t>(machInst, dest, op1, imm);
675    }
676
677    template <template <typename T> class BaseD,
678              template <typename T> class BaseQ>
679    StaticInstPtr
680    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682    {
683        if (q) {
684            if (size)
685                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686            else
687                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688        } else {
689            if (size)
690                return new Unknown(machInst);
691            else
692                return new BaseD<uint32_t>(machInst, dest, op1, imm);
693        }
694    }
695
696    template <template <typename T> class Base>
697    StaticInstPtr
698    decodeNeonUTwoMiscUSReg(unsigned size,
699                            ExtMachInst machInst, IntRegIndex dest,
700                            IntRegIndex op1)
701    {
702        switch (size) {
703          case 0:
704            return new Base<uint8_t>(machInst, dest, op1);
705          case 1:
706            return new Base<uint16_t>(machInst, dest, op1);
707          case 2:
708            return new Base<uint32_t>(machInst, dest, op1);
709          default:
710            return new Unknown(machInst);
711        }
712    }
713
714    template <template <typename T> class Base>
715    StaticInstPtr
716    decodeNeonSTwoMiscUSReg(unsigned size,
717                            ExtMachInst machInst, IntRegIndex dest,
718                            IntRegIndex op1)
719    {
720        switch (size) {
721          case 0:
722            return new Base<int8_t>(machInst, dest, op1);
723          case 1:
724            return new Base<int16_t>(machInst, dest, op1);
725          case 2:
726            return new Base<int32_t>(machInst, dest, op1);
727          default:
728            return new Unknown(machInst);
729        }
730    }
731
732    template <template <typename T> class BaseD,
733              template <typename T> class BaseQ>
734    StaticInstPtr
735    decodeNeonUTwoMiscSReg(bool q, unsigned size,
736                           ExtMachInst machInst, IntRegIndex dest,
737                           IntRegIndex op1)
738    {
739        if (q) {
740            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741        } else {
742            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743        }
744    }
745
746    template <template <typename T> class BaseD,
747              template <typename T> class BaseQ>
748    StaticInstPtr
749    decodeNeonSTwoMiscSReg(bool q, unsigned size,
750                           ExtMachInst machInst, IntRegIndex dest,
751                           IntRegIndex op1)
752    {
753        if (q) {
754            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755        } else {
756            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757        }
758    }
759
760    template <template <typename T> class Base>
761    StaticInstPtr
762    decodeNeonUTwoMiscUReg(unsigned size,
763                           ExtMachInst machInst, IntRegIndex dest,
764                           IntRegIndex op1)
765    {
766        switch (size) {
767          case 0:
768            return new Base<uint8_t>(machInst, dest, op1);
769          case 1:
770            return new Base<uint16_t>(machInst, dest, op1);
771          case 2:
772            return new Base<uint32_t>(machInst, dest, op1);
773          case 3:
774            return new Base<uint64_t>(machInst, dest, op1);
775          default:
776            return new Unknown(machInst);
777        }
778    }
779
780    template <template <typename T> class Base>
781    StaticInstPtr
782    decodeNeonSTwoMiscUReg(unsigned size,
783                           ExtMachInst machInst, IntRegIndex dest,
784                           IntRegIndex op1)
785    {
786        switch (size) {
787          case 0:
788            return new Base<int8_t>(machInst, dest, op1);
789          case 1:
790            return new Base<int16_t>(machInst, dest, op1);
791          case 2:
792            return new Base<int32_t>(machInst, dest, op1);
793          case 3:
794            return new Base<int64_t>(machInst, dest, op1);
795          default:
796            return new Unknown(machInst);
797        }
798    }
799
800    template <template <typename T> class BaseD,
801              template <typename T> class BaseQ>
802    StaticInstPtr
803    decodeNeonSTwoMiscReg(bool q, unsigned size,
804                          ExtMachInst machInst, IntRegIndex dest,
805                          IntRegIndex op1)
806    {
807        if (q) {
808            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809        } else {
810            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811        }
812    }
813
814    template <template <typename T> class BaseD,
815              template <typename T> class BaseQ>
816    StaticInstPtr
817    decodeNeonUTwoMiscReg(bool q, unsigned size,
818                          ExtMachInst machInst, IntRegIndex dest,
819                          IntRegIndex op1)
820    {
821        if (q) {
822            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823        } else {
824            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825        }
826    }
827
828    template <template <typename T> class BaseD,
829              template <typename T> class BaseQ>
830    StaticInstPtr
831    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832                            ExtMachInst machInst, IntRegIndex dest,
833                            IntRegIndex op1)
834    {
835        if (notSigned) {
836            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837                    q, size, machInst, dest, op1);
838        } else {
839            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840                    q, size, machInst, dest, op1);
841        }
842    }
843
844    template <template <typename T> class BaseD,
845              template <typename T> class BaseQ>
846    StaticInstPtr
847    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848                           IntRegIndex dest, IntRegIndex op1)
849    {
850        if (q) {
851            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852        } else {
853            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854        }
855    }
856
857    template <template <typename T> class BaseD,
858              template <typename T> class BaseQ>
859    StaticInstPtr
860    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861                           IntRegIndex dest, IntRegIndex op1)
862    {
863        if (q) {
864            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865        } else {
866            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867        }
868    }
869
870    template <template <typename T> class BaseD,
871              template <typename T> class BaseQ>
872    StaticInstPtr
873    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874                            IntRegIndex dest, IntRegIndex op1)
875    {
876        if (q) {
877            if (size)
878                return new BaseQ<uint64_t>(machInst, dest, op1);
879            else
880                return new BaseQ<uint32_t>(machInst, dest, op1);
881        } else {
882            if (size)
883                return new Unknown(machInst);
884            else
885                return new BaseD<uint32_t>(machInst, dest, op1);
886        }
887    }
888
889    template <template <typename T> class BaseD,
890              template <typename T> class BaseQ>
891    StaticInstPtr
892    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893                                   IntRegIndex dest, IntRegIndex op1)
894    {
895        if (size)
896            return new BaseQ<uint64_t>(machInst, dest, op1);
897        else
898            return new BaseD<uint32_t>(machInst, dest, op1);
899    }
900
901    template <template <typename T> class Base>
902    StaticInstPtr
903    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904                              IntRegIndex dest, IntRegIndex op1)
905    {
906        if (size)
907            return new Base<uint64_t>(machInst, dest, op1);
908        else
909            return new Base<uint32_t>(machInst, dest, op1);
910    }
911
912    template <template <typename T> class BaseD,
913              template <typename T> class BaseQ>
914    StaticInstPtr
915    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916                              IntRegIndex dest, IntRegIndex op1)
917    {
918        if (q) {
919            switch (size) {
920              case 0x0:
921                return new BaseQ<uint8_t>(machInst, dest, op1);
922              case 0x1:
923                return new BaseQ<uint16_t>(machInst, dest, op1);
924              case 0x2:
925                return new BaseQ<uint32_t>(machInst, dest, op1);
926              default:
927                return new Unknown(machInst);
928            }
929        } else {
930            switch (size) {
931              case 0x0:
932                return new BaseD<uint8_t>(machInst, dest, op1);
933              case 0x1:
934                return new BaseD<uint16_t>(machInst, dest, op1);
935              default:
936                return new Unknown(machInst);
937            }
938        }
939    }
940
941    template <template <typename T> class BaseD,
942              template <typename T> class BaseQ,
943              template <typename T> class BaseBQ>
944    StaticInstPtr
945    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946                              IntRegIndex dest, IntRegIndex op1)
947    {
948        if (q) {
949            switch (size) {
950              case 0x0:
951                return new BaseQ<uint8_t>(machInst, dest, op1);
952              case 0x1:
953                return new BaseQ<uint16_t>(machInst, dest, op1);
954              case 0x2:
955                return new BaseBQ<uint32_t>(machInst, dest, op1);
956              default:
957                return new Unknown(machInst);
958            }
959        } else {
960            switch (size) {
961              case 0x0:
962                return new BaseD<uint8_t>(machInst, dest, op1);
963              case 0x1:
964                return new BaseD<uint16_t>(machInst, dest, op1);
965              default:
966                return new Unknown(machInst);
967            }
968        }
969    }
970
971    template <template <typename T> class BaseD,
972              template <typename T> class BaseQ>
973    StaticInstPtr
974    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975                              IntRegIndex dest, IntRegIndex op1)
976    {
977        if (q) {
978            switch (size) {
979              case 0x0:
980                return new BaseQ<int8_t>(machInst, dest, op1);
981              case 0x1:
982                return new BaseQ<int16_t>(machInst, dest, op1);
983              case 0x2:
984                return new BaseQ<int32_t>(machInst, dest, op1);
985              default:
986                return new Unknown(machInst);
987            }
988        } else {
989            switch (size) {
990              case 0x0:
991                return new BaseD<int8_t>(machInst, dest, op1);
992              case 0x1:
993                return new BaseD<int16_t>(machInst, dest, op1);
994              default:
995                return new Unknown(machInst);
996            }
997        }
998    }
999
1000    template <template <typename T> class BaseD,
1001              template <typename T> class BaseQ,
1002              template <typename T> class BaseBQ>
1003    StaticInstPtr
1004    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005                                  IntRegIndex dest, IntRegIndex op1)
1006    {
1007        if (q) {
1008            switch (size) {
1009              case 0x0:
1010                return new BaseQ<uint8_t>(machInst, dest, op1);
1011              case 0x1:
1012                return new BaseQ<uint16_t>(machInst, dest, op1);
1013              case 0x2:
1014                return new BaseBQ<uint32_t>(machInst, dest, op1);
1015              default:
1016                return new Unknown(machInst);
1017            }
1018        } else {
1019            switch (size) {
1020              case 0x0:
1021                return new BaseD<uint8_t>(machInst, dest, op1);
1022              case 0x1:
1023                return new BaseD<uint16_t>(machInst, dest, op1);
1024              default:
1025                return new Unknown(machInst);
1026            }
1027        }
1028    }
1029
1030    template <template <typename T> class BaseD,
1031              template <typename T> class BaseQ,
1032              template <typename T> class BaseBQ>
1033    StaticInstPtr
1034    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035                                  IntRegIndex dest, IntRegIndex op1)
1036    {
1037        if (q) {
1038            switch (size) {
1039              case 0x0:
1040                return new BaseQ<int8_t>(machInst, dest, op1);
1041              case 0x1:
1042                return new BaseQ<int16_t>(machInst, dest, op1);
1043              case 0x2:
1044                return new BaseBQ<int32_t>(machInst, dest, op1);
1045              default:
1046                return new Unknown(machInst);
1047            }
1048        } else {
1049            switch (size) {
1050              case 0x0:
1051                return new BaseD<int8_t>(machInst, dest, op1);
1052              case 0x1:
1053                return new BaseD<int16_t>(machInst, dest, op1);
1054              default:
1055                return new Unknown(machInst);
1056            }
1057        }
1058    }
1059}};
1060
1061let {{
1062    header_output = ""
1063    exec_output = ""
1064
1065    vcompares = '''
1066    static float
1067    vcgtFunc(float op1, float op2)
1068    {
1069        if (std::isnan(op1) || std::isnan(op2))
1070            return 2.0;
1071        return (op1 > op2) ? 0.0 : 1.0;
1072    }
1073
1074    static float
1075    vcgeFunc(float op1, float op2)
1076    {
1077        if (std::isnan(op1) || std::isnan(op2))
1078            return 2.0;
1079        return (op1 >= op2) ? 0.0 : 1.0;
1080    }
1081
1082    static float
1083    vceqFunc(float op1, float op2)
1084    {
1085        if (isSnan(op1) || isSnan(op2))
1086            return 2.0;
1087        return (op1 == op2) ? 0.0 : 1.0;
1088    }
1089'''
1090    vcomparesL = '''
1091    static float
1092    vcleFunc(float op1, float op2)
1093    {
1094        if (std::isnan(op1) || std::isnan(op2))
1095            return 2.0;
1096        return (op1 <= op2) ? 0.0 : 1.0;
1097    }
1098
1099    static float
1100    vcltFunc(float op1, float op2)
1101    {
1102        if (std::isnan(op1) || std::isnan(op2))
1103            return 2.0;
1104        return (op1 < op2) ? 0.0 : 1.0;
1105    }
1106'''
1107    vacomparesG = '''
1108    static float
1109    vacgtFunc(float op1, float op2)
1110    {
1111        if (std::isnan(op1) || std::isnan(op2))
1112            return 2.0;
1113        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114    }
1115
1116    static float
1117    vacgeFunc(float op1, float op2)
1118    {
1119        if (std::isnan(op1) || std::isnan(op2))
1120            return 2.0;
1121        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122    }
1123'''
1124
1125    exec_output += vcompares + vacomparesG
1126
1127    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130    signedTypes = smallSignedTypes + ("int64_t",)
1131    smallTypes = smallUnsignedTypes + smallSignedTypes
1132    allTypes = unsignedTypes + signedTypes
1133
1134    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135                          readDest=False, pairwise=False):
1136        global header_output, exec_output
1137        eWalkCode = simdEnabledCheckCode + '''
1138        RegVect srcReg1, srcReg2, destReg;
1139        '''
1140        for reg in range(rCount):
1141            eWalkCode += '''
1142                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144            ''' % { "reg" : reg }
1145            if readDest:
1146                eWalkCode += '''
1147                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148                ''' % { "reg" : reg }
1149        readDestCode = ''
1150        if readDest:
1151            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152        if pairwise:
1153            eWalkCode += '''
1154            for (unsigned i = 0; i < eCount; i++) {
1155                Element srcElem1 = gtoh(2 * i < eCount ?
1156                                        srcReg1.elements[2 * i] :
1157                                        srcReg2.elements[2 * i - eCount]);
1158                Element srcElem2 = gtoh(2 * i < eCount ?
1159                                        srcReg1.elements[2 * i + 1] :
1160                                        srcReg2.elements[2 * i + 1 - eCount]);
1161                Element destElem;
1162                %(readDest)s
1163                %(op)s
1164                destReg.elements[i] = htog(destElem);
1165            }
1166            ''' % { "op" : op, "readDest" : readDestCode }
1167        else:
1168            eWalkCode += '''
1169            for (unsigned i = 0; i < eCount; i++) {
1170                Element srcElem1 = gtoh(srcReg1.elements[i]);
1171                Element srcElem2 = gtoh(srcReg2.elements[i]);
1172                Element destElem;
1173                %(readDest)s
1174                %(op)s
1175                destReg.elements[i] = htog(destElem);
1176            }
1177            ''' % { "op" : op, "readDest" : readDestCode }
1178        for reg in range(rCount):
1179            eWalkCode += '''
1180            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181            ''' % { "reg" : reg }
1182        iop = InstObjParams(name, Name,
1183                            "RegRegRegOp",
1184                            { "code": eWalkCode,
1185                              "r_count": rCount,
1186                              "predicate_test": predicateTest,
1187                              "op_class": opClass }, [])
1188        header_output += NeonRegRegRegOpDeclare.subst(iop)
1189        exec_output += NeonEqualRegExecute.subst(iop)
1190        for type in types:
1191            substDict = { "targs" : type,
1192                          "class_name" : Name }
1193            exec_output += NeonExecDeclare.subst(substDict)
1194
1195    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196                            readDest=False, pairwise=False, toInt=False):
1197        global header_output, exec_output
1198        eWalkCode = simdEnabledCheckCode + '''
1199        typedef FloatReg FloatVect[rCount];
1200        FloatVect srcRegs1, srcRegs2;
1201        '''
1202        if toInt:
1203            eWalkCode += 'RegVect destRegs;\n'
1204        else:
1205            eWalkCode += 'FloatVect destRegs;\n'
1206        for reg in range(rCount):
1207            eWalkCode += '''
1208                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210            ''' % { "reg" : reg }
1211            if readDest:
1212                if toInt:
1213                    eWalkCode += '''
1214                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215                    ''' % { "reg" : reg }
1216                else:
1217                    eWalkCode += '''
1218                        destRegs[%(reg)d] = FpDestP%(reg)d;
1219                    ''' % { "reg" : reg }
1220        readDestCode = ''
1221        if readDest:
1222            readDestCode = 'destReg = destRegs[r];'
1223        destType = 'FloatReg'
1224        writeDest = 'destRegs[r] = destReg;'
1225        if toInt:
1226            destType = 'FloatRegBits'
1227            writeDest = 'destRegs.regs[r] = destReg;'
1228        if pairwise:
1229            eWalkCode += '''
1230            for (unsigned r = 0; r < rCount; r++) {
1231                FloatReg srcReg1 = (2 * r < rCount) ?
1232                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1233                FloatReg srcReg2 = (2 * r < rCount) ?
1234                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235                %(destType)s destReg;
1236                %(readDest)s
1237                %(op)s
1238                %(writeDest)s
1239            }
1240            ''' % { "op" : op,
1241                    "readDest" : readDestCode,
1242                    "destType" : destType,
1243                    "writeDest" : writeDest }
1244        else:
1245            eWalkCode += '''
1246            for (unsigned r = 0; r < rCount; r++) {
1247                FloatReg srcReg1 = srcRegs1[r];
1248                FloatReg srcReg2 = srcRegs2[r];
1249                %(destType)s destReg;
1250                %(readDest)s
1251                %(op)s
1252                %(writeDest)s
1253            }
1254            ''' % { "op" : op,
1255                    "readDest" : readDestCode,
1256                    "destType" : destType,
1257                    "writeDest" : writeDest }
1258        for reg in range(rCount):
1259            if toInt:
1260                eWalkCode += '''
1261                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262                ''' % { "reg" : reg }
1263            else:
1264                eWalkCode += '''
1265                FpDestP%(reg)d = destRegs[%(reg)d];
1266                ''' % { "reg" : reg }
1267        iop = InstObjParams(name, Name,
1268                            "FpRegRegRegOp",
1269                            { "code": eWalkCode,
1270                              "r_count": rCount,
1271                              "predicate_test": predicateTest,
1272                              "op_class": opClass }, [])
1273        header_output += NeonRegRegRegOpDeclare.subst(iop)
1274        exec_output += NeonEqualRegExecute.subst(iop)
1275        for type in types:
1276            substDict = { "targs" : type,
1277                          "class_name" : Name }
1278            exec_output += NeonExecDeclare.subst(substDict)
1279
1280    def threeUnequalRegInst(name, Name, opClass, types, op,
1281                            bigSrc1, bigSrc2, bigDest, readDest):
1282        global header_output, exec_output
1283        src1Cnt = src2Cnt = destCnt = 2
1284        src1Prefix = src2Prefix = destPrefix = ''
1285        if bigSrc1:
1286            src1Cnt = 4
1287            src1Prefix = 'Big'
1288        if bigSrc2:
1289            src2Cnt = 4
1290            src2Prefix = 'Big'
1291        if bigDest:
1292            destCnt = 4
1293            destPrefix = 'Big'
1294        eWalkCode = simdEnabledCheckCode + '''
1295            %sRegVect srcReg1;
1296            %sRegVect srcReg2;
1297            %sRegVect destReg;
1298        ''' % (src1Prefix, src2Prefix, destPrefix)
1299        for reg in range(src1Cnt):
1300            eWalkCode += '''
1301                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302            ''' % { "reg" : reg }
1303        for reg in range(src2Cnt):
1304            eWalkCode += '''
1305                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306            ''' % { "reg" : reg }
1307        if readDest:
1308            for reg in range(destCnt):
1309                eWalkCode += '''
1310                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311                ''' % { "reg" : reg }
1312        readDestCode = ''
1313        if readDest:
1314            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315        eWalkCode += '''
1316        for (unsigned i = 0; i < eCount; i++) {
1317            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319            %(destPrefix)sElement destElem;
1320            %(readDest)s
1321            %(op)s
1322            destReg.elements[i] = htog(destElem);
1323        }
1324        ''' % { "op" : op, "readDest" : readDestCode,
1325                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326                "destPrefix" : destPrefix }
1327        for reg in range(destCnt):
1328            eWalkCode += '''
1329            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330            ''' % { "reg" : reg }
1331        iop = InstObjParams(name, Name,
1332                            "RegRegRegOp",
1333                            { "code": eWalkCode,
1334                              "r_count": 2,
1335                              "predicate_test": predicateTest,
1336                              "op_class": opClass }, [])
1337        header_output += NeonRegRegRegOpDeclare.subst(iop)
1338        exec_output += NeonUnequalRegExecute.subst(iop)
1339        for type in types:
1340            substDict = { "targs" : type,
1341                          "class_name" : Name }
1342            exec_output += NeonExecDeclare.subst(substDict)
1343
1344    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345        threeUnequalRegInst(name, Name, opClass, types, op,
1346                            True, True, False, readDest)
1347
1348    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349        threeUnequalRegInst(name, Name, opClass, types, op,
1350                            False, False, True, readDest)
1351
1352    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353        threeUnequalRegInst(name, Name, opClass, types, op,
1354                            True, False, True, readDest)
1355
1356    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357        global header_output, exec_output
1358        eWalkCode = simdEnabledCheckCode + '''
1359        RegVect srcReg1, srcReg2, destReg;
1360        '''
1361        for reg in range(rCount):
1362            eWalkCode += '''
1363                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365            ''' % { "reg" : reg }
1366            if readDest:
1367                eWalkCode += '''
1368                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369                ''' % { "reg" : reg }
1370        readDestCode = ''
1371        if readDest:
1372            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373        eWalkCode += '''
1374        if (imm < 0 && imm >= eCount) {
1375            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1376                                                           mnemonic);
1377        } else {
1378            for (unsigned i = 0; i < eCount; i++) {
1379                Element srcElem1 = gtoh(srcReg1.elements[i]);
1380                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1381                Element destElem;
1382                %(readDest)s
1383                %(op)s
1384                destReg.elements[i] = htog(destElem);
1385            }
1386        }
1387        ''' % { "op" : op, "readDest" : readDestCode }
1388        for reg in range(rCount):
1389            eWalkCode += '''
1390            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1391            ''' % { "reg" : reg }
1392        iop = InstObjParams(name, Name,
1393                            "RegRegRegImmOp",
1394                            { "code": eWalkCode,
1395                              "r_count": rCount,
1396                              "predicate_test": predicateTest,
1397                              "op_class": opClass }, [])
1398        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1399        exec_output += NeonEqualRegExecute.subst(iop)
1400        for type in types:
1401            substDict = { "targs" : type,
1402                          "class_name" : Name }
1403            exec_output += NeonExecDeclare.subst(substDict)
1404
1405    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1406        global header_output, exec_output
1407        rCount = 2
1408        eWalkCode = simdEnabledCheckCode + '''
1409        RegVect srcReg1, srcReg2;
1410        BigRegVect destReg;
1411        '''
1412        for reg in range(rCount):
1413            eWalkCode += '''
1414                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1415                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1416            ''' % { "reg" : reg }
1417        if readDest:
1418            for reg in range(2 * rCount):
1419                eWalkCode += '''
1420                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1421                ''' % { "reg" : reg }
1422        readDestCode = ''
1423        if readDest:
1424            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1425        eWalkCode += '''
1426        if (imm < 0 && imm >= eCount) {
1427            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1428                                                          mnemonic);
1429        } else {
1430            for (unsigned i = 0; i < eCount; i++) {
1431                Element srcElem1 = gtoh(srcReg1.elements[i]);
1432                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1433                BigElement destElem;
1434                %(readDest)s
1435                %(op)s
1436                destReg.elements[i] = htog(destElem);
1437            }
1438        }
1439        ''' % { "op" : op, "readDest" : readDestCode }
1440        for reg in range(2 * rCount):
1441            eWalkCode += '''
1442            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1443            ''' % { "reg" : reg }
1444        iop = InstObjParams(name, Name,
1445                            "RegRegRegImmOp",
1446                            { "code": eWalkCode,
1447                              "r_count": rCount,
1448                              "predicate_test": predicateTest,
1449                              "op_class": opClass }, [])
1450        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1451        exec_output += NeonUnequalRegExecute.subst(iop)
1452        for type in types:
1453            substDict = { "targs" : type,
1454                          "class_name" : Name }
1455            exec_output += NeonExecDeclare.subst(substDict)
1456
1457    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1458        global header_output, exec_output
1459        eWalkCode = simdEnabledCheckCode + '''
1460        typedef FloatReg FloatVect[rCount];
1461        FloatVect srcRegs1, srcRegs2, destRegs;
1462        '''
1463        for reg in range(rCount):
1464            eWalkCode += '''
1465                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1466                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1467            ''' % { "reg" : reg }
1468            if readDest:
1469                eWalkCode += '''
1470                    destRegs[%(reg)d] = FpDestP%(reg)d;
1471                ''' % { "reg" : reg }
1472        readDestCode = ''
1473        if readDest:
1474            readDestCode = 'destReg = destRegs[i];'
1475        eWalkCode += '''
1476        if (imm < 0 && imm >= eCount) {
1477            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1478                                                           mnemonic);
1479        } else {
1480            for (unsigned i = 0; i < rCount; i++) {
1481                FloatReg srcReg1 = srcRegs1[i];
1482                FloatReg srcReg2 = srcRegs2[imm];
1483                FloatReg destReg;
1484                %(readDest)s
1485                %(op)s
1486                destRegs[i] = destReg;
1487            }
1488        }
1489        ''' % { "op" : op, "readDest" : readDestCode }
1490        for reg in range(rCount):
1491            eWalkCode += '''
1492            FpDestP%(reg)d = destRegs[%(reg)d];
1493            ''' % { "reg" : reg }
1494        iop = InstObjParams(name, Name,
1495                            "FpRegRegRegImmOp",
1496                            { "code": eWalkCode,
1497                              "r_count": rCount,
1498                              "predicate_test": predicateTest,
1499                              "op_class": opClass }, [])
1500        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1501        exec_output += NeonEqualRegExecute.subst(iop)
1502        for type in types:
1503            substDict = { "targs" : type,
1504                          "class_name" : Name }
1505            exec_output += NeonExecDeclare.subst(substDict)
1506
1507    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1508            readDest=False, toInt=False, fromInt=False):
1509        global header_output, exec_output
1510        eWalkCode = simdEnabledCheckCode + '''
1511        RegVect srcRegs1, destRegs;
1512        '''
1513        for reg in range(rCount):
1514            eWalkCode += '''
1515                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1516            ''' % { "reg" : reg }
1517            if readDest:
1518                eWalkCode += '''
1519                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1520                ''' % { "reg" : reg }
1521        readDestCode = ''
1522        if readDest:
1523            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1524            if toInt:
1525                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1526        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1527        if fromInt:
1528            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1529        declDest = 'Element destElem;'
1530        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1531        if toInt:
1532            declDest = 'FloatRegBits destReg;'
1533            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1534        eWalkCode += '''
1535        for (unsigned i = 0; i < eCount; i++) {
1536            %(readOp)s
1537            %(declDest)s
1538            %(readDest)s
1539            %(op)s
1540            %(writeDest)s
1541        }
1542        ''' % { "readOp" : readOpCode,
1543                "declDest" : declDest,
1544                "readDest" : readDestCode,
1545                "op" : op,
1546                "writeDest" : writeDestCode }
1547        for reg in range(rCount):
1548            eWalkCode += '''
1549            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1550            ''' % { "reg" : reg }
1551        iop = InstObjParams(name, Name,
1552                            "RegRegImmOp",
1553                            { "code": eWalkCode,
1554                              "r_count": rCount,
1555                              "predicate_test": predicateTest,
1556                              "op_class": opClass }, [])
1557        header_output += NeonRegRegImmOpDeclare.subst(iop)
1558        exec_output += NeonEqualRegExecute.subst(iop)
1559        for type in types:
1560            substDict = { "targs" : type,
1561                          "class_name" : Name }
1562            exec_output += NeonExecDeclare.subst(substDict)
1563
1564    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1565        global header_output, exec_output
1566        eWalkCode = simdEnabledCheckCode + '''
1567        BigRegVect srcReg1;
1568        RegVect destReg;
1569        '''
1570        for reg in range(4):
1571            eWalkCode += '''
1572                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1573            ''' % { "reg" : reg }
1574        if readDest:
1575            for reg in range(2):
1576                eWalkCode += '''
1577                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1578                ''' % { "reg" : reg }
1579        readDestCode = ''
1580        if readDest:
1581            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1582        eWalkCode += '''
1583        for (unsigned i = 0; i < eCount; i++) {
1584            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1585            Element destElem;
1586            %(readDest)s
1587            %(op)s
1588            destReg.elements[i] = htog(destElem);
1589        }
1590        ''' % { "op" : op, "readDest" : readDestCode }
1591        for reg in range(2):
1592            eWalkCode += '''
1593            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1594            ''' % { "reg" : reg }
1595        iop = InstObjParams(name, Name,
1596                            "RegRegImmOp",
1597                            { "code": eWalkCode,
1598                              "r_count": 2,
1599                              "predicate_test": predicateTest,
1600                              "op_class": opClass }, [])
1601        header_output += NeonRegRegImmOpDeclare.subst(iop)
1602        exec_output += NeonUnequalRegExecute.subst(iop)
1603        for type in types:
1604            substDict = { "targs" : type,
1605                          "class_name" : Name }
1606            exec_output += NeonExecDeclare.subst(substDict)
1607
1608    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1609        global header_output, exec_output
1610        eWalkCode = simdEnabledCheckCode + '''
1611        RegVect srcReg1;
1612        BigRegVect destReg;
1613        '''
1614        for reg in range(2):
1615            eWalkCode += '''
1616                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1617            ''' % { "reg" : reg }
1618        if readDest:
1619            for reg in range(4):
1620                eWalkCode += '''
1621                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1622                ''' % { "reg" : reg }
1623        readDestCode = ''
1624        if readDest:
1625            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1626        eWalkCode += '''
1627        for (unsigned i = 0; i < eCount; i++) {
1628            Element srcElem1 = gtoh(srcReg1.elements[i]);
1629            BigElement destElem;
1630            %(readDest)s
1631            %(op)s
1632            destReg.elements[i] = htog(destElem);
1633        }
1634        ''' % { "op" : op, "readDest" : readDestCode }
1635        for reg in range(4):
1636            eWalkCode += '''
1637            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1638            ''' % { "reg" : reg }
1639        iop = InstObjParams(name, Name,
1640                            "RegRegImmOp",
1641                            { "code": eWalkCode,
1642                              "r_count": 2,
1643                              "predicate_test": predicateTest,
1644                              "op_class": opClass }, [])
1645        header_output += NeonRegRegImmOpDeclare.subst(iop)
1646        exec_output += NeonUnequalRegExecute.subst(iop)
1647        for type in types:
1648            substDict = { "targs" : type,
1649                          "class_name" : Name }
1650            exec_output += NeonExecDeclare.subst(substDict)
1651
1652    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1653        global header_output, exec_output
1654        eWalkCode = simdEnabledCheckCode + '''
1655        RegVect srcReg1, destReg;
1656        '''
1657        for reg in range(rCount):
1658            eWalkCode += '''
1659                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1660            ''' % { "reg" : reg }
1661            if readDest:
1662                eWalkCode += '''
1663                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1664                ''' % { "reg" : reg }
1665        readDestCode = ''
1666        if readDest:
1667            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1668        eWalkCode += '''
1669        for (unsigned i = 0; i < eCount; i++) {
1670            unsigned j = i;
1671            Element srcElem1 = gtoh(srcReg1.elements[i]);
1672            Element destElem;
1673            %(readDest)s
1674            %(op)s
1675            destReg.elements[j] = htog(destElem);
1676        }
1677        ''' % { "op" : op, "readDest" : readDestCode }
1678        for reg in range(rCount):
1679            eWalkCode += '''
1680            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1681            ''' % { "reg" : reg }
1682        iop = InstObjParams(name, Name,
1683                            "RegRegOp",
1684                            { "code": eWalkCode,
1685                              "r_count": rCount,
1686                              "predicate_test": predicateTest,
1687                              "op_class": opClass }, [])
1688        header_output += NeonRegRegOpDeclare.subst(iop)
1689        exec_output += NeonEqualRegExecute.subst(iop)
1690        for type in types:
1691            substDict = { "targs" : type,
1692                          "class_name" : Name }
1693            exec_output += NeonExecDeclare.subst(substDict)
1694
1695    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1696        global header_output, exec_output
1697        eWalkCode = simdEnabledCheckCode + '''
1698        RegVect srcReg1, destReg;
1699        '''
1700        for reg in range(rCount):
1701            eWalkCode += '''
1702                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1703            ''' % { "reg" : reg }
1704            if readDest:
1705                eWalkCode += '''
1706                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1707                ''' % { "reg" : reg }
1708        readDestCode = ''
1709        if readDest:
1710            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1711        eWalkCode += '''
1712        for (unsigned i = 0; i < eCount; i++) {
1713            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1714            Element destElem;
1715            %(readDest)s
1716            %(op)s
1717            destReg.elements[i] = htog(destElem);
1718        }
1719        ''' % { "op" : op, "readDest" : readDestCode }
1720        for reg in range(rCount):
1721            eWalkCode += '''
1722            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1723            ''' % { "reg" : reg }
1724        iop = InstObjParams(name, Name,
1725                            "RegRegImmOp",
1726                            { "code": eWalkCode,
1727                              "r_count": rCount,
1728                              "predicate_test": predicateTest,
1729                              "op_class": opClass }, [])
1730        header_output += NeonRegRegImmOpDeclare.subst(iop)
1731        exec_output += NeonEqualRegExecute.subst(iop)
1732        for type in types:
1733            substDict = { "targs" : type,
1734                          "class_name" : Name }
1735            exec_output += NeonExecDeclare.subst(substDict)
1736
1737    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1738        global header_output, exec_output
1739        eWalkCode = simdEnabledCheckCode + '''
1740        RegVect srcReg1, destReg;
1741        '''
1742        for reg in range(rCount):
1743            eWalkCode += '''
1744                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1745                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1746            ''' % { "reg" : reg }
1747            if readDest:
1748                eWalkCode += '''
1749                ''' % { "reg" : reg }
1750        readDestCode = ''
1751        if readDest:
1752            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1753        eWalkCode += op
1754        for reg in range(rCount):
1755            eWalkCode += '''
1756            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1757            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1758            ''' % { "reg" : reg }
1759        iop = InstObjParams(name, Name,
1760                            "RegRegOp",
1761                            { "code": eWalkCode,
1762                              "r_count": rCount,
1763                              "predicate_test": predicateTest,
1764                              "op_class": opClass }, [])
1765        header_output += NeonRegRegOpDeclare.subst(iop)
1766        exec_output += NeonEqualRegExecute.subst(iop)
1767        for type in types:
1768            substDict = { "targs" : type,
1769                          "class_name" : Name }
1770            exec_output += NeonExecDeclare.subst(substDict)
1771
1772    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1773            readDest=False, toInt=False):
1774        global header_output, exec_output
1775        eWalkCode = simdEnabledCheckCode + '''
1776        typedef FloatReg FloatVect[rCount];
1777        FloatVect srcRegs1;
1778        '''
1779        if toInt:
1780            eWalkCode += 'RegVect destRegs;\n'
1781        else:
1782            eWalkCode += 'FloatVect destRegs;\n'
1783        for reg in range(rCount):
1784            eWalkCode += '''
1785                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1786            ''' % { "reg" : reg }
1787            if readDest:
1788                if toInt:
1789                    eWalkCode += '''
1790                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1791                    ''' % { "reg" : reg }
1792                else:
1793                    eWalkCode += '''
1794                        destRegs[%(reg)d] = FpDestP%(reg)d;
1795                    ''' % { "reg" : reg }
1796        readDestCode = ''
1797        if readDest:
1798            readDestCode = 'destReg = destRegs[i];'
1799        destType = 'FloatReg'
1800        writeDest = 'destRegs[r] = destReg;'
1801        if toInt:
1802            destType = 'FloatRegBits'
1803            writeDest = 'destRegs.regs[r] = destReg;'
1804        eWalkCode += '''
1805        for (unsigned r = 0; r < rCount; r++) {
1806            FloatReg srcReg1 = srcRegs1[r];
1807            %(destType)s destReg;
1808            %(readDest)s
1809            %(op)s
1810            %(writeDest)s
1811        }
1812        ''' % { "op" : op,
1813                "readDest" : readDestCode,
1814                "destType" : destType,
1815                "writeDest" : writeDest }
1816        for reg in range(rCount):
1817            if toInt:
1818                eWalkCode += '''
1819                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1820                ''' % { "reg" : reg }
1821            else:
1822                eWalkCode += '''
1823                FpDestP%(reg)d = destRegs[%(reg)d];
1824                ''' % { "reg" : reg }
1825        iop = InstObjParams(name, Name,
1826                            "FpRegRegOp",
1827                            { "code": eWalkCode,
1828                              "r_count": rCount,
1829                              "predicate_test": predicateTest,
1830                              "op_class": opClass }, [])
1831        header_output += NeonRegRegOpDeclare.subst(iop)
1832        exec_output += NeonEqualRegExecute.subst(iop)
1833        for type in types:
1834            substDict = { "targs" : type,
1835                          "class_name" : Name }
1836            exec_output += NeonExecDeclare.subst(substDict)
1837
1838    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1839        global header_output, exec_output
1840        eWalkCode = simdEnabledCheckCode + '''
1841        RegVect srcRegs;
1842        BigRegVect destReg;
1843        '''
1844        for reg in range(rCount):
1845            eWalkCode += '''
1846                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1847            ''' % { "reg" : reg }
1848            if readDest:
1849                eWalkCode += '''
1850                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1851                ''' % { "reg" : reg }
1852        readDestCode = ''
1853        if readDest:
1854            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1855        eWalkCode += '''
1856        for (unsigned i = 0; i < eCount / 2; i++) {
1857            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1858            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1859            BigElement destElem;
1860            %(readDest)s
1861            %(op)s
1862            destReg.elements[i] = htog(destElem);
1863        }
1864        ''' % { "op" : op, "readDest" : readDestCode }
1865        for reg in range(rCount):
1866            eWalkCode += '''
1867            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1868            ''' % { "reg" : reg }
1869        iop = InstObjParams(name, Name,
1870                            "RegRegOp",
1871                            { "code": eWalkCode,
1872                              "r_count": rCount,
1873                              "predicate_test": predicateTest,
1874                              "op_class": opClass }, [])
1875        header_output += NeonRegRegOpDeclare.subst(iop)
1876        exec_output += NeonUnequalRegExecute.subst(iop)
1877        for type in types:
1878            substDict = { "targs" : type,
1879                          "class_name" : Name }
1880            exec_output += NeonExecDeclare.subst(substDict)
1881
1882    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1883        global header_output, exec_output
1884        eWalkCode = simdEnabledCheckCode + '''
1885        BigRegVect srcReg1;
1886        RegVect destReg;
1887        '''
1888        for reg in range(4):
1889            eWalkCode += '''
1890                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1891            ''' % { "reg" : reg }
1892        if readDest:
1893            for reg in range(2):
1894                eWalkCode += '''
1895                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1896                ''' % { "reg" : reg }
1897        readDestCode = ''
1898        if readDest:
1899            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1900        eWalkCode += '''
1901        for (unsigned i = 0; i < eCount; i++) {
1902            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1903            Element destElem;
1904            %(readDest)s
1905            %(op)s
1906            destReg.elements[i] = htog(destElem);
1907        }
1908        ''' % { "op" : op, "readDest" : readDestCode }
1909        for reg in range(2):
1910            eWalkCode += '''
1911            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1912            ''' % { "reg" : reg }
1913        iop = InstObjParams(name, Name,
1914                            "RegRegOp",
1915                            { "code": eWalkCode,
1916                              "r_count": 2,
1917                              "predicate_test": predicateTest,
1918                              "op_class": opClass }, [])
1919        header_output += NeonRegRegOpDeclare.subst(iop)
1920        exec_output += NeonUnequalRegExecute.subst(iop)
1921        for type in types:
1922            substDict = { "targs" : type,
1923                          "class_name" : Name }
1924            exec_output += NeonExecDeclare.subst(substDict)
1925
1926    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1927        global header_output, exec_output
1928        eWalkCode = simdEnabledCheckCode + '''
1929        RegVect destReg;
1930        '''
1931        if readDest:
1932            for reg in range(rCount):
1933                eWalkCode += '''
1934                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1935                ''' % { "reg" : reg }
1936        readDestCode = ''
1937        if readDest:
1938            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1939        eWalkCode += '''
1940        for (unsigned i = 0; i < eCount; i++) {
1941            Element destElem;
1942            %(readDest)s
1943            %(op)s
1944            destReg.elements[i] = htog(destElem);
1945        }
1946        ''' % { "op" : op, "readDest" : readDestCode }
1947        for reg in range(rCount):
1948            eWalkCode += '''
1949            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1950            ''' % { "reg" : reg }
1951        iop = InstObjParams(name, Name,
1952                            "RegImmOp",
1953                            { "code": eWalkCode,
1954                              "r_count": rCount,
1955                              "predicate_test": predicateTest,
1956                              "op_class": opClass }, [])
1957        header_output += NeonRegImmOpDeclare.subst(iop)
1958        exec_output += NeonEqualRegExecute.subst(iop)
1959        for type in types:
1960            substDict = { "targs" : type,
1961                          "class_name" : Name }
1962            exec_output += NeonExecDeclare.subst(substDict)
1963
1964    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1965        global header_output, exec_output
1966        eWalkCode = simdEnabledCheckCode + '''
1967        RegVect srcReg1;
1968        BigRegVect destReg;
1969        '''
1970        for reg in range(2):
1971            eWalkCode += '''
1972                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1973            ''' % { "reg" : reg }
1974        if readDest:
1975            for reg in range(4):
1976                eWalkCode += '''
1977                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1978                ''' % { "reg" : reg }
1979        readDestCode = ''
1980        if readDest:
1981            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1982        eWalkCode += '''
1983        for (unsigned i = 0; i < eCount; i++) {
1984            Element srcElem1 = gtoh(srcReg1.elements[i]);
1985            BigElement destElem;
1986            %(readDest)s
1987            %(op)s
1988            destReg.elements[i] = htog(destElem);
1989        }
1990        ''' % { "op" : op, "readDest" : readDestCode }
1991        for reg in range(4):
1992            eWalkCode += '''
1993            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1994            ''' % { "reg" : reg }
1995        iop = InstObjParams(name, Name,
1996                            "RegRegOp",
1997                            { "code": eWalkCode,
1998                              "r_count": 2,
1999                              "predicate_test": predicateTest,
2000                              "op_class": opClass }, [])
2001        header_output += NeonRegRegOpDeclare.subst(iop)
2002        exec_output += NeonUnequalRegExecute.subst(iop)
2003        for type in types:
2004            substDict = { "targs" : type,
2005                          "class_name" : Name }
2006            exec_output += NeonExecDeclare.subst(substDict)
2007
2008    vhaddCode = '''
2009        Element carryBit =
2010            (((unsigned)srcElem1 & 0x1) +
2011             ((unsigned)srcElem2 & 0x1)) >> 1;
2012        // Use division instead of a shift to ensure the sign extension works
2013        // right. The compiler will figure out if it can be a shift. Mask the
2014        // inputs so they get truncated correctly.
2015        destElem = (((srcElem1 & ~(Element)1) / 2) +
2016                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2017    '''
2018    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2019    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2020
2021    vrhaddCode = '''
2022        Element carryBit =
2023            (((unsigned)srcElem1 & 0x1) +
2024             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2025        // Use division instead of a shift to ensure the sign extension works
2026        // right. The compiler will figure out if it can be a shift. Mask the
2027        // inputs so they get truncated correctly.
2028        destElem = (((srcElem1 & ~(Element)1) / 2) +
2029                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2030    '''
2031    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2032    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2033
2034    vhsubCode = '''
2035        Element barrowBit =
2036            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2037        // Use division instead of a shift to ensure the sign extension works
2038        // right. The compiler will figure out if it can be a shift. Mask the
2039        // inputs so they get truncated correctly.
2040        destElem = (((srcElem1 & ~(Element)1) / 2) -
2041                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2042    '''
2043    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2044    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2045
2046    vandCode = '''
2047        destElem = srcElem1 & srcElem2;
2048    '''
2049    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2050    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2051
2052    vbicCode = '''
2053        destElem = srcElem1 & ~srcElem2;
2054    '''
2055    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2056    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2057
2058    vorrCode = '''
2059        destElem = srcElem1 | srcElem2;
2060    '''
2061    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2062    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2063
2064    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2065    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2066
2067    vornCode = '''
2068        destElem = srcElem1 | ~srcElem2;
2069    '''
2070    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2071    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2072
2073    veorCode = '''
2074        destElem = srcElem1 ^ srcElem2;
2075    '''
2076    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2077    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2078
2079    vbifCode = '''
2080        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2081    '''
2082    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2083    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2084    vbitCode = '''
2085        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2086    '''
2087    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2088    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2089    vbslCode = '''
2090        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2091    '''
2092    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2093    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2094
2095    vmaxCode = '''
2096        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2097    '''
2098    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2099    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2100
2101    vminCode = '''
2102        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2103    '''
2104    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2105    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2106
2107    vaddCode = '''
2108        destElem = srcElem1 + srcElem2;
2109    '''
2110    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2111    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2112
2113    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2114                      2, vaddCode, pairwise=True)
2115    vaddlwCode = '''
2116        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2117    '''
2118    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2119    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2120    vaddhnCode = '''
2121        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2122                   (sizeof(Element) * 8);
2123    '''
2124    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2125    vraddhnCode = '''
2126        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2127                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2128                   (sizeof(Element) * 8);
2129    '''
2130    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2131
2132    vsubCode = '''
2133        destElem = srcElem1 - srcElem2;
2134    '''
2135    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2136    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2137    vsublwCode = '''
2138        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2139    '''
2140    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2141    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2142
2143    vqaddUCode = '''
2144        destElem = srcElem1 + srcElem2;
2145        FPSCR fpscr = (FPSCR) FpscrQc;
2146        if (destElem < srcElem1 || destElem < srcElem2) {
2147            destElem = (Element)(-1);
2148            fpscr.qc = 1;
2149        }
2150        FpscrQc = fpscr;
2151    '''
2152    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2153    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2154    vsubhnCode = '''
2155        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2156                   (sizeof(Element) * 8);
2157    '''
2158    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2159    vrsubhnCode = '''
2160        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2161                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2162                   (sizeof(Element) * 8);
2163    '''
2164    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2165
2166    vqaddSCode = '''
2167        destElem = srcElem1 + srcElem2;
2168        FPSCR fpscr = (FPSCR) FpscrQc;
2169        bool negDest = (destElem < 0);
2170        bool negSrc1 = (srcElem1 < 0);
2171        bool negSrc2 = (srcElem2 < 0);
2172        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2173            if (negDest)
2174                /* If (>=0) plus (>=0) yields (<0), saturate to +. */
2175                destElem = std::numeric_limits<Element>::max();
2176            else
2177                /* If (<0) plus (<0) yields (>=0), saturate to -. */
2178                destElem = std::numeric_limits<Element>::min();
2179            fpscr.qc = 1;
2180        }
2181        FpscrQc = fpscr;
2182    '''
2183    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2184    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2185
2186    vqsubUCode = '''
2187        destElem = srcElem1 - srcElem2;
2188        FPSCR fpscr = (FPSCR) FpscrQc;
2189        if (destElem > srcElem1) {
2190            destElem = 0;
2191            fpscr.qc = 1;
2192        }
2193        FpscrQc = fpscr;
2194    '''
2195    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2196    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2197
2198    vqsubSCode = '''
2199        destElem = srcElem1 - srcElem2;
2200        FPSCR fpscr = (FPSCR) FpscrQc;
2201        bool negDest = (destElem < 0);
2202        bool negSrc1 = (srcElem1 < 0);
2203        bool posSrc2 = (srcElem2 >= 0);
2204        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2205            if (negDest)
2206                /* If (>=0) minus (<0) yields (<0), saturate to +. */
2207                destElem = std::numeric_limits<Element>::max();
2208            else
2209                /* If (<0) minus (>=0) yields (>=0), saturate to -. */
2210                destElem = std::numeric_limits<Element>::min();
2211            fpscr.qc = 1;
2212        }
2213        FpscrQc = fpscr;
2214    '''
2215    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2216    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2217
2218    vcgtCode = '''
2219        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2220    '''
2221    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2222    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2223
2224    vcgeCode = '''
2225        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2226    '''
2227    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2228    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2229
2230    vceqCode = '''
2231        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2232    '''
2233    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2234    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2235
2236    vshlCode = '''
2237        int16_t shiftAmt = (int8_t)srcElem2;
2238        if (shiftAmt < 0) {
2239            shiftAmt = -shiftAmt;
2240            if (shiftAmt >= sizeof(Element) * 8) {
2241                shiftAmt = sizeof(Element) * 8 - 1;
2242                destElem = 0;
2243            } else {
2244                destElem = (srcElem1 >> shiftAmt);
2245            }
2246            // Make sure the right shift sign extended when it should.
2247            if (ltz(srcElem1) && !ltz(destElem)) {
2248                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2249                                             1 - shiftAmt));
2250            }
2251        } else {
2252            if (shiftAmt >= sizeof(Element) * 8) {
2253                destElem = 0;
2254            } else {
2255                destElem = srcElem1 << shiftAmt;
2256            }
2257        }
2258    '''
2259    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2260    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2261
2262    vrshlCode = '''
2263        int16_t shiftAmt = (int8_t)srcElem2;
2264        if (shiftAmt < 0) {
2265            shiftAmt = -shiftAmt;
2266            Element rBit = 0;
2267            if (shiftAmt <= sizeof(Element) * 8)
2268                rBit = bits(srcElem1, shiftAmt - 1);
2269            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2270                rBit = 1;
2271            if (shiftAmt >= sizeof(Element) * 8) {
2272                shiftAmt = sizeof(Element) * 8 - 1;
2273                destElem = 0;
2274            } else {
2275                destElem = (srcElem1 >> shiftAmt);
2276            }
2277            // Make sure the right shift sign extended when it should.
2278            if (ltz(srcElem1) && !ltz(destElem)) {
2279                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2280                                             1 - shiftAmt));
2281            }
2282            destElem += rBit;
2283        } else if (shiftAmt > 0) {
2284            if (shiftAmt >= sizeof(Element) * 8) {
2285                destElem = 0;
2286            } else {
2287                destElem = srcElem1 << shiftAmt;
2288            }
2289        } else {
2290            destElem = srcElem1;
2291        }
2292    '''
2293    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2294    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2295
2296    vqshlUCode = '''
2297        int16_t shiftAmt = (int8_t)srcElem2;
2298        FPSCR fpscr = (FPSCR) FpscrQc;
2299        if (shiftAmt < 0) {
2300            shiftAmt = -shiftAmt;
2301            if (shiftAmt >= sizeof(Element) * 8) {
2302                shiftAmt = sizeof(Element) * 8 - 1;
2303                destElem = 0;
2304            } else {
2305                destElem = (srcElem1 >> shiftAmt);
2306            }
2307        } else if (shiftAmt > 0) {
2308            if (shiftAmt >= sizeof(Element) * 8) {
2309                if (srcElem1 != 0) {
2310                    destElem = mask(sizeof(Element) * 8);
2311                    fpscr.qc = 1;
2312                } else {
2313                    destElem = 0;
2314                }
2315            } else {
2316                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2317                            sizeof(Element) * 8 - shiftAmt)) {
2318                    destElem = mask(sizeof(Element) * 8);
2319                    fpscr.qc = 1;
2320                } else {
2321                    destElem = srcElem1 << shiftAmt;
2322                }
2323            }
2324        } else {
2325            destElem = srcElem1;
2326        }
2327        FpscrQc = fpscr;
2328    '''
2329    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2330    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2331
2332    vqshlSCode = '''
2333        int16_t shiftAmt = (int8_t)srcElem2;
2334        FPSCR fpscr = (FPSCR) FpscrQc;
2335        if (shiftAmt < 0) {
2336            shiftAmt = -shiftAmt;
2337            if (shiftAmt >= sizeof(Element) * 8) {
2338                shiftAmt = sizeof(Element) * 8 - 1;
2339                destElem = 0;
2340            } else {
2341                destElem = (srcElem1 >> shiftAmt);
2342            }
2343            // Make sure the right shift sign extended when it should.
2344            if (srcElem1 < 0 && destElem >= 0) {
2345                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2346                                             1 - shiftAmt));
2347            }
2348        } else if (shiftAmt > 0) {
2349            bool sat = false;
2350            if (shiftAmt >= sizeof(Element) * 8) {
2351                if (srcElem1 != 0)
2352                    sat = true;
2353                else
2354                    destElem = 0;
2355            } else {
2356                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2357                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2358                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2359                    sat = true;
2360                } else {
2361                    destElem = srcElem1 << shiftAmt;
2362                }
2363            }
2364            if (sat) {
2365                fpscr.qc = 1;
2366                destElem = mask(sizeof(Element) * 8 - 1);
2367                if (srcElem1 < 0)
2368                    destElem = ~destElem;
2369            }
2370        } else {
2371            destElem = srcElem1;
2372        }
2373        FpscrQc = fpscr;
2374    '''
2375    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2376    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2377
2378    vqrshlUCode = '''
2379        int16_t shiftAmt = (int8_t)srcElem2;
2380        FPSCR fpscr = (FPSCR) FpscrQc;
2381        if (shiftAmt < 0) {
2382            shiftAmt = -shiftAmt;
2383            Element rBit = 0;
2384            if (shiftAmt <= sizeof(Element) * 8)
2385                rBit = bits(srcElem1, shiftAmt - 1);
2386            if (shiftAmt >= sizeof(Element) * 8) {
2387                shiftAmt = sizeof(Element) * 8 - 1;
2388                destElem = 0;
2389            } else {
2390                destElem = (srcElem1 >> shiftAmt);
2391            }
2392            destElem += rBit;
2393        } else {
2394            if (shiftAmt >= sizeof(Element) * 8) {
2395                if (srcElem1 != 0) {
2396                    destElem = mask(sizeof(Element) * 8);
2397                    fpscr.qc = 1;
2398                } else {
2399                    destElem = 0;
2400                }
2401            } else {
2402                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2403                            sizeof(Element) * 8 - shiftAmt)) {
2404                    destElem = mask(sizeof(Element) * 8);
2405                    fpscr.qc = 1;
2406                } else {
2407                    destElem = srcElem1 << shiftAmt;
2408                }
2409            }
2410        }
2411        FpscrQc = fpscr;
2412    '''
2413    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2414    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2415
2416    vqrshlSCode = '''
2417        int16_t shiftAmt = (int8_t)srcElem2;
2418        FPSCR fpscr = (FPSCR) FpscrQc;
2419        if (shiftAmt < 0) {
2420            shiftAmt = -shiftAmt;
2421            Element rBit = 0;
2422            if (shiftAmt <= sizeof(Element) * 8)
2423                rBit = bits(srcElem1, shiftAmt - 1);
2424            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2425                rBit = 1;
2426            if (shiftAmt >= sizeof(Element) * 8) {
2427                shiftAmt = sizeof(Element) * 8 - 1;
2428                destElem = 0;
2429            } else {
2430                destElem = (srcElem1 >> shiftAmt);
2431            }
2432            // Make sure the right shift sign extended when it should.
2433            if (srcElem1 < 0 && destElem >= 0) {
2434                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2435                                             1 - shiftAmt));
2436            }
2437            destElem += rBit;
2438        } else if (shiftAmt > 0) {
2439            bool sat = false;
2440            if (shiftAmt >= sizeof(Element) * 8) {
2441                if (srcElem1 != 0)
2442                    sat = true;
2443                else
2444                    destElem = 0;
2445            } else {
2446                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2447                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2448                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2449                    sat = true;
2450                } else {
2451                    destElem = srcElem1 << shiftAmt;
2452                }
2453            }
2454            if (sat) {
2455                fpscr.qc = 1;
2456                destElem = mask(sizeof(Element) * 8 - 1);
2457                if (srcElem1 < 0)
2458                    destElem = ~destElem;
2459            }
2460        } else {
2461            destElem = srcElem1;
2462        }
2463        FpscrQc = fpscr;
2464    '''
2465    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2466    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2467
2468    vabaCode = '''
2469        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2470                                            (srcElem2 - srcElem1);
2471    '''
2472    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2473    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2474    vabalCode = '''
2475        destElem += (srcElem1 > srcElem2) ?
2476            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2477            ((BigElement)srcElem2 - (BigElement)srcElem1);
2478    '''
2479    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2480
2481    vabdCode = '''
2482        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2483                                           (srcElem2 - srcElem1);
2484    '''
2485    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2486    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2487    vabdlCode = '''
2488        destElem = (srcElem1 > srcElem2) ?
2489            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2490            ((BigElement)srcElem2 - (BigElement)srcElem1);
2491    '''
2492    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2493
2494    vtstCode = '''
2495        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2496    '''
2497    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2498    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2499
2500    vmulCode = '''
2501        destElem = srcElem1 * srcElem2;
2502    '''
2503    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2504    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2505    vmullCode = '''
2506        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2507    '''
2508    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2509
2510    vmlaCode = '''
2511        destElem = destElem + srcElem1 * srcElem2;
2512    '''
2513    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2514    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2515    vmlalCode = '''
2516        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2517    '''
2518    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2519
2520    vqdmlalCode = '''
2521        FPSCR fpscr = (FPSCR) FpscrQc;
2522        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2523        Element maxNeg = std::numeric_limits<Element>::min();
2524        Element halfNeg = maxNeg / 2;
2525        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2526            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2527            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2528            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2529            fpscr.qc = 1;
2530        }
2531        bool negPreDest = ltz(destElem);
2532        destElem += midElem;
2533        bool negDest = ltz(destElem);
2534        bool negMid = ltz(midElem);
2535        if (negPreDest == negMid && negMid != negDest) {
2536            destElem = mask(sizeof(BigElement) * 8 - 1);
2537            if (negPreDest)
2538                destElem = ~destElem;
2539            fpscr.qc = 1;
2540        }
2541        FpscrQc = fpscr;
2542    '''
2543    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2544
2545    vqdmlslCode = '''
2546        FPSCR fpscr = (FPSCR) FpscrQc;
2547        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2548        Element maxNeg = std::numeric_limits<Element>::min();
2549        Element halfNeg = maxNeg / 2;
2550        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2551            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2552            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2553            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2554            fpscr.qc = 1;
2555        }
2556        bool negPreDest = ltz(destElem);
2557        destElem -= midElem;
2558        bool negDest = ltz(destElem);
2559        bool posMid = ltz((BigElement)-midElem);
2560        if (negPreDest == posMid && posMid != negDest) {
2561            destElem = mask(sizeof(BigElement) * 8 - 1);
2562            if (negPreDest)
2563                destElem = ~destElem;
2564            fpscr.qc = 1;
2565        }
2566        FpscrQc = fpscr;
2567    '''
2568    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2569
2570    vqdmullCode = '''
2571        FPSCR fpscr = (FPSCR) FpscrQc;
2572        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2573        if (srcElem1 == srcElem2 &&
2574                srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2575            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2576            fpscr.qc = 1;
2577        }
2578        FpscrQc = fpscr;
2579    '''
2580    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2581
2582    vmlsCode = '''
2583        destElem = destElem - srcElem1 * srcElem2;
2584    '''
2585    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2586    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2587    vmlslCode = '''
2588        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2589    '''
2590    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2591
2592    vmulpCode = '''
2593        destElem = 0;
2594        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2595            if (bits(srcElem2, j))
2596                destElem ^= srcElem1 << j;
2597        }
2598    '''
2599    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2600    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2601    vmullpCode = '''
2602        destElem = 0;
2603        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2604            if (bits(srcElem2, j))
2605                destElem ^= (BigElement)srcElem1 << j;
2606        }
2607    '''
2608    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2609
2610    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2611
2612    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2613
2614    vqdmulhCode = '''
2615        FPSCR fpscr = (FPSCR) FpscrQc;
2616        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2617                   (sizeof(Element) * 8);
2618        if (srcElem1 == srcElem2 &&
2619                srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2620            destElem = ~srcElem1;
2621            fpscr.qc = 1;
2622        }
2623        FpscrQc = fpscr;
2624    '''
2625    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2626    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2627
2628    vqrdmulhCode = '''
2629        FPSCR fpscr = (FPSCR) FpscrQc;
2630        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2631                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2632                   (sizeof(Element) * 8);
2633        Element maxNeg = std::numeric_limits<Element>::min();
2634        Element halfNeg = maxNeg / 2;
2635        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2636            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2637            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2638            if (destElem < 0) {
2639                destElem = mask(sizeof(Element) * 8 - 1);
2640            } else {
2641                destElem = std::numeric_limits<Element>::min();
2642            }
2643            fpscr.qc = 1;
2644        }
2645        FpscrQc = fpscr;
2646    '''
2647    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2648            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2649    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2650            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2651
2652    vmaxfpCode = '''
2653        FPSCR fpscr = (FPSCR) FpscrExc;
2654        bool done;
2655        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2656        if (!done) {
2657            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2658                               true, true, VfpRoundNearest);
2659        } else if (flushToZero(srcReg1, srcReg2)) {
2660            fpscr.idc = 1;
2661        }
2662        FpscrExc = fpscr;
2663    '''
2664    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2665    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2666
2667    vminfpCode = '''
2668        FPSCR fpscr = (FPSCR) FpscrExc;
2669        bool done;
2670        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2671        if (!done) {
2672            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2673                               true, true, VfpRoundNearest);
2674        } else if (flushToZero(srcReg1, srcReg2)) {
2675            fpscr.idc = 1;
2676        }
2677        FpscrExc = fpscr;
2678    '''
2679    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2680    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2681
2682    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2683                        2, vmaxfpCode, pairwise=True)
2684    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2685                        4, vmaxfpCode, pairwise=True)
2686
2687    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2688                        2, vminfpCode, pairwise=True)
2689    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2690                        4, vminfpCode, pairwise=True)
2691
2692    vaddfpCode = '''
2693        FPSCR fpscr = (FPSCR) FpscrExc;
2694        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2695                           true, true, VfpRoundNearest);
2696        FpscrExc = fpscr;
2697    '''
2698    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2699    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2700
2701    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2702                        2, vaddfpCode, pairwise=True)
2703    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2704                        4, vaddfpCode, pairwise=True)
2705
2706    vsubfpCode = '''
2707        FPSCR fpscr = (FPSCR) FpscrExc;
2708        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2709                           true, true, VfpRoundNearest);
2710        FpscrExc = fpscr;
2711    '''
2712    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2713    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2714
2715    vmulfpCode = '''
2716        FPSCR fpscr = (FPSCR) FpscrExc;
2717        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2718                           true, true, VfpRoundNearest);
2719        FpscrExc = fpscr;
2720    '''
2721    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2722    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2723
2724    vmlafpCode = '''
2725        FPSCR fpscr = (FPSCR) FpscrExc;
2726        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2727                             true, true, VfpRoundNearest);
2728        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2729                           true, true, VfpRoundNearest);
2730        FpscrExc = fpscr;
2731    '''
2732    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2733    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2734
2735    vfmafpCode = '''
2736        FPSCR fpscr = (FPSCR) FpscrExc;
2737        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2738                            true, true, VfpRoundNearest);
2739        FpscrExc = fpscr;
2740    '''
2741    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2742    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2743
2744    vfmsfpCode = '''
2745        FPSCR fpscr = (FPSCR) FpscrExc;
2746        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2747                            true, true, VfpRoundNearest);
2748        FpscrExc = fpscr;
2749    '''
2750    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2751    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2752
2753    vmlsfpCode = '''
2754        FPSCR fpscr = (FPSCR) FpscrExc;
2755        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2756                             true, true, VfpRoundNearest);
2757        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2758                           true, true, VfpRoundNearest);
2759        FpscrExc = fpscr;
2760    '''
2761    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2762    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2763
2764    vcgtfpCode = '''
2765        FPSCR fpscr = (FPSCR) FpscrExc;
2766        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2767                             true, true, VfpRoundNearest);
2768        destReg = (res == 0) ? -1 : 0;
2769        if (res == 2.0)
2770            fpscr.ioc = 1;
2771        FpscrExc = fpscr;
2772    '''
2773    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2774            2, vcgtfpCode, toInt = True)
2775    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2776            4, vcgtfpCode, toInt = True)
2777
2778    vcgefpCode = '''
2779        FPSCR fpscr = (FPSCR) FpscrExc;
2780        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2781                             true, true, VfpRoundNearest);
2782        destReg = (res == 0) ? -1 : 0;
2783        if (res == 2.0)
2784            fpscr.ioc = 1;
2785        FpscrExc = fpscr;
2786    '''
2787    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2788            2, vcgefpCode, toInt = True)
2789    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2790            4, vcgefpCode, toInt = True)
2791
2792    vacgtfpCode = '''
2793        FPSCR fpscr = (FPSCR) FpscrExc;
2794        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2795                             true, true, VfpRoundNearest);
2796        destReg = (res == 0) ? -1 : 0;
2797        if (res == 2.0)
2798            fpscr.ioc = 1;
2799        FpscrExc = fpscr;
2800    '''
2801    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2802            2, vacgtfpCode, toInt = True)
2803    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2804            4, vacgtfpCode, toInt = True)
2805
2806    vacgefpCode = '''
2807        FPSCR fpscr = (FPSCR) FpscrExc;
2808        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2809                             true, true, VfpRoundNearest);
2810        destReg = (res == 0) ? -1 : 0;
2811        if (res == 2.0)
2812            fpscr.ioc = 1;
2813        FpscrExc = fpscr;
2814    '''
2815    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2816            2, vacgefpCode, toInt = True)
2817    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2818            4, vacgefpCode, toInt = True)
2819
2820    vceqfpCode = '''
2821        FPSCR fpscr = (FPSCR) FpscrExc;
2822        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2823                             true, true, VfpRoundNearest);
2824        destReg = (res == 0) ? -1 : 0;
2825        if (res == 2.0)
2826            fpscr.ioc = 1;
2827        FpscrExc = fpscr;
2828    '''
2829    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2830            2, vceqfpCode, toInt = True)
2831    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2832            4, vceqfpCode, toInt = True)
2833
2834    vrecpsCode = '''
2835        FPSCR fpscr = (FPSCR) FpscrExc;
2836        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2837                           true, true, VfpRoundNearest);
2838        FpscrExc = fpscr;
2839    '''
2840    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2841    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2842
2843    vrsqrtsCode = '''
2844        FPSCR fpscr = (FPSCR) FpscrExc;
2845        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2846                           true, true, VfpRoundNearest);
2847        FpscrExc = fpscr;
2848    '''
2849    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2850    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2851
2852    vabdfpCode = '''
2853        FPSCR fpscr = (FPSCR) FpscrExc;
2854        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2855                             true, true, VfpRoundNearest);
2856        destReg = fabs(mid);
2857        FpscrExc = fpscr;
2858    '''
2859    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2860    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2861
2862    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2863    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2864    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2865    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2866    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2867
2868    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2869    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2870    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2871    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2872    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2873
2874    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2875    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2876    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2877    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2878    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2879
2880    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2881    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2882    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2883    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2884    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2885    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2886            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2887    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2888            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2889
2890    vshrCode = '''
2891        if (imm >= sizeof(srcElem1) * 8) {
2892            if (ltz(srcElem1))
2893                destElem = -1;
2894            else
2895                destElem = 0;
2896        } else {
2897            destElem = srcElem1 >> imm;
2898        }
2899    '''
2900    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2901    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2902
2903    vsraCode = '''
2904        Element mid;;
2905        if (imm >= sizeof(srcElem1) * 8) {
2906            mid = ltz(srcElem1) ? -1 : 0;
2907        } else {
2908            mid = srcElem1 >> imm;
2909            if (ltz(srcElem1) && !ltz(mid)) {
2910                mid |= -(mid & ((Element)1 <<
2911                            (sizeof(Element) * 8 - 1 - imm)));
2912            }
2913        }
2914        destElem += mid;
2915    '''
2916    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2917    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2918
2919    vrshrCode = '''
2920        if (imm > sizeof(srcElem1) * 8) {
2921            destElem = 0;
2922        } else if (imm) {
2923            Element rBit = bits(srcElem1, imm - 1);
2924            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2925        } else {
2926            destElem = srcElem1;
2927        }
2928    '''
2929    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2930    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2931
2932    vrsraCode = '''
2933        if (imm > sizeof(srcElem1) * 8) {
2934            destElem += 0;
2935        } else if (imm) {
2936            Element rBit = bits(srcElem1, imm - 1);
2937            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2938        } else {
2939            destElem += srcElem1;
2940        }
2941    '''
2942    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2943    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2944
2945    vsriCode = '''
2946        if (imm >= sizeof(Element) * 8) {
2947            destElem = destElem;
2948        } else {
2949            destElem = (srcElem1 >> imm) |
2950                (destElem & ~mask(sizeof(Element) * 8 - imm));
2951        }
2952    '''
2953    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2954    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2955
2956    vshlCode = '''
2957        if (imm >= sizeof(Element) * 8) {
2958            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2959        } else {
2960            destElem = srcElem1 << imm;
2961        }
2962    '''
2963    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2964    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2965
2966    vsliCode = '''
2967        if (imm >= sizeof(Element) * 8) {
2968            destElem = destElem;
2969        } else {
2970            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2971        }
2972    '''
2973    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2974    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2975
2976    vqshlCode = '''
2977        FPSCR fpscr = (FPSCR) FpscrQc;
2978        if (imm >= sizeof(Element) * 8) {
2979            if (srcElem1 != 0) {
2980                destElem = std::numeric_limits<Element>::min();
2981                if (srcElem1 > 0)
2982                    destElem = ~destElem;
2983                fpscr.qc = 1;
2984            } else {
2985                destElem = 0;
2986            }
2987        } else if (imm) {
2988            destElem = (srcElem1 << imm);
2989            uint64_t topBits = bits((uint64_t)srcElem1,
2990                                    sizeof(Element) * 8 - 1,
2991                                    sizeof(Element) * 8 - 1 - imm);
2992            if (topBits != 0 && topBits != mask(imm + 1)) {
2993                destElem = std::numeric_limits<Element>::min();
2994                if (srcElem1 > 0)
2995                    destElem = ~destElem;
2996                fpscr.qc = 1;
2997            }
2998        } else {
2999            destElem = srcElem1;
3000        }
3001        FpscrQc = fpscr;
3002    '''
3003    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3004    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3005
3006    vqshluCode = '''
3007        FPSCR fpscr = (FPSCR) FpscrQc;
3008        if (imm >= sizeof(Element) * 8) {
3009            if (srcElem1 != 0) {
3010                destElem = mask(sizeof(Element) * 8);
3011                fpscr.qc = 1;
3012            } else {
3013                destElem = 0;
3014            }
3015        } else if (imm) {
3016            destElem = (srcElem1 << imm);
3017            uint64_t topBits = bits((uint64_t)srcElem1,
3018                                    sizeof(Element) * 8 - 1,
3019                                    sizeof(Element) * 8 - imm);
3020            if (topBits != 0) {
3021                destElem = mask(sizeof(Element) * 8);
3022                fpscr.qc = 1;
3023            }
3024        } else {
3025            destElem = srcElem1;
3026        }
3027        FpscrQc = fpscr;
3028    '''
3029    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3030    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3031
3032    vqshlusCode = '''
3033        FPSCR fpscr = (FPSCR) FpscrQc;
3034        if (imm >= sizeof(Element) * 8) {
3035            if (srcElem1 < 0) {
3036                destElem = 0;
3037                fpscr.qc = 1;
3038            } else if (srcElem1 > 0) {
3039                destElem = mask(sizeof(Element) * 8);
3040                fpscr.qc = 1;
3041            } else {
3042                destElem = 0;
3043            }
3044        } else if (imm) {
3045            destElem = (srcElem1 << imm);
3046            uint64_t topBits = bits((uint64_t)srcElem1,
3047                                    sizeof(Element) * 8 - 1,
3048                                    sizeof(Element) * 8 - imm);
3049            if (srcElem1 < 0) {
3050                destElem = 0;
3051                fpscr.qc = 1;
3052            } else if (topBits != 0) {
3053                destElem = mask(sizeof(Element) * 8);
3054                fpscr.qc = 1;
3055            }
3056        } else {
3057            if (srcElem1 < 0) {
3058                fpscr.qc = 1;
3059                destElem = 0;
3060            } else {
3061                destElem = srcElem1;
3062            }
3063        }
3064        FpscrQc = fpscr;
3065    '''
3066    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3067    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3068
3069    vshrnCode = '''
3070        if (imm >= sizeof(srcElem1) * 8) {
3071            destElem = 0;
3072        } else {
3073            destElem = srcElem1 >> imm;
3074        }
3075    '''
3076    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3077
3078    vrshrnCode = '''
3079        if (imm > sizeof(srcElem1) * 8) {
3080            destElem = 0;
3081        } else if (imm) {
3082            Element rBit = bits(srcElem1, imm - 1);
3083            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3084        } else {
3085            destElem = srcElem1;
3086        }
3087    '''
3088    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3089
3090    vqshrnCode = '''
3091        FPSCR fpscr = (FPSCR) FpscrQc;
3092        if (imm > sizeof(srcElem1) * 8) {
3093            if (srcElem1 != 0 && srcElem1 != -1)
3094                fpscr.qc = 1;
3095            destElem = 0;
3096        } else if (imm) {
3097            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3098            mid |= -(mid & ((BigElement)1 <<
3099                        (sizeof(BigElement) * 8 - 1 - imm)));
3100            if (mid != (Element)mid) {
3101                destElem = mask(sizeof(Element) * 8 - 1);
3102                if (srcElem1 < 0)
3103                    destElem = ~destElem;
3104                fpscr.qc = 1;
3105            } else {
3106                destElem = mid;
3107            }
3108        } else {
3109            destElem = srcElem1;
3110        }
3111        FpscrQc = fpscr;
3112    '''
3113    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3114
3115    vqshrunCode = '''
3116        FPSCR fpscr = (FPSCR) FpscrQc;
3117        if (imm > sizeof(srcElem1) * 8) {
3118            if (srcElem1 != 0)
3119                fpscr.qc = 1;
3120            destElem = 0;
3121        } else if (imm) {
3122            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3123            if (mid != (Element)mid) {
3124                destElem = mask(sizeof(Element) * 8);
3125                fpscr.qc = 1;
3126            } else {
3127                destElem = mid;
3128            }
3129        } else {
3130            destElem = srcElem1;
3131        }
3132        FpscrQc = fpscr;
3133    '''
3134    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3135                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3136
3137    vqshrunsCode = '''
3138        FPSCR fpscr = (FPSCR) FpscrQc;
3139        if (imm > sizeof(srcElem1) * 8) {
3140            if (srcElem1 != 0)
3141                fpscr.qc = 1;
3142            destElem = 0;
3143        } else if (imm) {
3144            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3145            if (bits(mid, sizeof(BigElement) * 8 - 1,
3146                          sizeof(Element) * 8) != 0) {
3147                if (srcElem1 < 0) {
3148                    destElem = 0;
3149                } else {
3150                    destElem = mask(sizeof(Element) * 8);
3151                }
3152                fpscr.qc = 1;
3153            } else {
3154                destElem = mid;
3155            }
3156        } else {
3157            destElem = srcElem1;
3158        }
3159        FpscrQc = fpscr;
3160    '''
3161    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3162                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3163
3164    vqrshrnCode = '''
3165        FPSCR fpscr = (FPSCR) FpscrQc;
3166        if (imm > sizeof(srcElem1) * 8) {
3167            if (srcElem1 != 0 && srcElem1 != -1)
3168                fpscr.qc = 1;
3169            destElem = 0;
3170        } else if (imm) {
3171            BigElement mid = (srcElem1 >> (imm - 1));
3172            uint64_t rBit = mid & 0x1;
3173            mid >>= 1;
3174            mid |= -(mid & ((BigElement)1 <<
3175                        (sizeof(BigElement) * 8 - 1 - imm)));
3176            mid += rBit;
3177            if (mid != (Element)mid) {
3178                destElem = mask(sizeof(Element) * 8 - 1);
3179                if (srcElem1 < 0)
3180                    destElem = ~destElem;
3181                fpscr.qc = 1;
3182            } else {
3183                destElem = mid;
3184            }
3185        } else {
3186            if (srcElem1 != (Element)srcElem1) {
3187                destElem = mask(sizeof(Element) * 8 - 1);
3188                if (srcElem1 < 0)
3189                    destElem = ~destElem;
3190                fpscr.qc = 1;
3191            } else {
3192                destElem = srcElem1;
3193            }
3194        }
3195        FpscrQc = fpscr;
3196    '''
3197    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3198                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3199
3200    vqrshrunCode = '''
3201        FPSCR fpscr = (FPSCR) FpscrQc;
3202        if (imm > sizeof(srcElem1) * 8) {
3203            if (srcElem1 != 0)
3204                fpscr.qc = 1;
3205            destElem = 0;
3206        } else if (imm) {
3207            BigElement mid = (srcElem1 >> (imm - 1));
3208            uint64_t rBit = mid & 0x1;
3209            mid >>= 1;
3210            mid += rBit;
3211            if (mid != (Element)mid) {
3212                destElem = mask(sizeof(Element) * 8);
3213                fpscr.qc = 1;
3214            } else {
3215                destElem = mid;
3216            }
3217        } else {
3218            if (srcElem1 != (Element)srcElem1) {
3219                destElem = mask(sizeof(Element) * 8 - 1);
3220                fpscr.qc = 1;
3221            } else {
3222                destElem = srcElem1;
3223            }
3224        }
3225        FpscrQc = fpscr;
3226    '''
3227    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3228                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3229
3230    vqrshrunsCode = '''
3231        FPSCR fpscr = (FPSCR) FpscrQc;
3232        if (imm > sizeof(srcElem1) * 8) {
3233            if (srcElem1 != 0)
3234                fpscr.qc = 1;
3235            destElem = 0;
3236        } else if (imm) {
3237            BigElement mid = (srcElem1 >> (imm - 1));
3238            uint64_t rBit = mid & 0x1;
3239            mid >>= 1;
3240            mid |= -(mid & ((BigElement)1 <<
3241                            (sizeof(BigElement) * 8 - 1 - imm)));
3242            mid += rBit;
3243            if (bits(mid, sizeof(BigElement) * 8 - 1,
3244                          sizeof(Element) * 8) != 0) {
3245                if (srcElem1 < 0) {
3246                    destElem = 0;
3247                } else {
3248                    destElem = mask(sizeof(Element) * 8);
3249                }
3250                fpscr.qc = 1;
3251            } else {
3252                destElem = mid;
3253            }
3254        } else {
3255            if (srcElem1 < 0) {
3256                fpscr.qc = 1;
3257                destElem = 0;
3258            } else {
3259                destElem = srcElem1;
3260            }
3261        }
3262        FpscrQc = fpscr;
3263    '''
3264    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3265                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3266
3267    vshllCode = '''
3268        if (imm >= sizeof(destElem) * 8) {
3269            destElem = 0;
3270        } else {
3271            destElem = (BigElement)srcElem1 << imm;
3272        }
3273    '''
3274    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3275
3276    vmovlCode = '''
3277        destElem = srcElem1;
3278    '''
3279    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3280
3281    vcvt2ufxCode = '''
3282        FPSCR fpscr = (FPSCR) FpscrExc;
3283        if (flushToZero(srcElem1))
3284            fpscr.idc = 1;
3285        VfpSavedState state = prepFpState(VfpRoundNearest);
3286        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3287        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3288        __asm__ __volatile__("" :: "m" (destReg));
3289        finishVfp(fpscr, state, true);
3290        FpscrExc = fpscr;
3291    '''
3292    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3293            2, vcvt2ufxCode, toInt = True)
3294    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3295            4, vcvt2ufxCode, toInt = True)
3296
3297    vcvt2sfxCode = '''
3298        FPSCR fpscr = (FPSCR) FpscrExc;
3299        if (flushToZero(srcElem1))
3300            fpscr.idc = 1;
3301        VfpSavedState state = prepFpState(VfpRoundNearest);
3302        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3303        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3304        __asm__ __volatile__("" :: "m" (destReg));
3305        finishVfp(fpscr, state, true);
3306        FpscrExc = fpscr;
3307    '''
3308    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3309            2, vcvt2sfxCode, toInt = True)
3310    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3311            4, vcvt2sfxCode, toInt = True)
3312
3313    vcvtu2fpCode = '''
3314        FPSCR fpscr = (FPSCR) FpscrExc;
3315        VfpSavedState state = prepFpState(VfpRoundNearest);
3316        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3317        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3318        __asm__ __volatile__("" :: "m" (destElem));
3319        finishVfp(fpscr, state, true);
3320        FpscrExc = fpscr;
3321    '''
3322    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3323            2, vcvtu2fpCode, fromInt = True)
3324    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3325            4, vcvtu2fpCode, fromInt = True)
3326
3327    vcvts2fpCode = '''
3328        FPSCR fpscr = (FPSCR) FpscrExc;
3329        VfpSavedState state = prepFpState(VfpRoundNearest);
3330        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3331        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3332        __asm__ __volatile__("" :: "m" (destElem));
3333        finishVfp(fpscr, state, true);
3334        FpscrExc = fpscr;
3335    '''
3336    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3337            2, vcvts2fpCode, fromInt = True)
3338    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3339            4, vcvts2fpCode, fromInt = True)
3340
3341    vcvts2hCode = '''
3342        destElem = 0;
3343        FPSCR fpscr = (FPSCR) FpscrExc;
3344        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3345        if (flushToZero(srcFp1))
3346            fpscr.idc = 1;
3347        VfpSavedState state = prepFpState(VfpRoundNearest);
3348        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3349                                : "m" (srcFp1), "m" (destElem));
3350        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3351                              fpscr.ahp, srcFp1);
3352        __asm__ __volatile__("" :: "m" (destElem));
3353        finishVfp(fpscr, state, true);
3354        FpscrExc = fpscr;
3355    '''
3356    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3357
3358    vcvth2sCode = '''
3359        destElem = 0;
3360        FPSCR fpscr = (FPSCR) FpscrExc;
3361        VfpSavedState state = prepFpState(VfpRoundNearest);
3362        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3363                                : "m" (srcElem1), "m" (destElem));
3364        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3365        __asm__ __volatile__("" :: "m" (destElem));
3366        finishVfp(fpscr, state, true);
3367        FpscrExc = fpscr;
3368    '''
3369    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3370
3371    vrsqrteCode = '''
3372        destElem = unsignedRSqrtEstimate(srcElem1);
3373    '''
3374    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3375    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3376
3377    vrsqrtefpCode = '''
3378        FPSCR fpscr = (FPSCR) FpscrExc;
3379        if (flushToZero(srcReg1))
3380            fpscr.idc = 1;
3381        destReg = fprSqrtEstimate(fpscr, srcReg1);
3382        FpscrExc = fpscr;
3383    '''
3384    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3385    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3386
3387    vrecpeCode = '''
3388        destElem = unsignedRecipEstimate(srcElem1);
3389    '''
3390    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3391    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3392
3393    vrecpefpCode = '''
3394        FPSCR fpscr = (FPSCR) FpscrExc;
3395        if (flushToZero(srcReg1))
3396            fpscr.idc = 1;
3397        destReg = fpRecipEstimate(fpscr, srcReg1);
3398        FpscrExc = fpscr;
3399    '''
3400    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3401    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3402
3403    vrev16Code = '''
3404        destElem = srcElem1;
3405        unsigned groupSize = ((1 << 1) / sizeof(Element));
3406        unsigned reverseMask = (groupSize - 1);
3407        j = i ^ reverseMask;
3408    '''
3409    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3410    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3411    vrev32Code = '''
3412        destElem = srcElem1;
3413        unsigned groupSize = ((1 << 2) / sizeof(Element));
3414        unsigned reverseMask = (groupSize - 1);
3415        j = i ^ reverseMask;
3416    '''
3417    twoRegMiscInst("vrev32", "NVrev32D",
3418            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3419    twoRegMiscInst("vrev32", "NVrev32Q",
3420            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3421    vrev64Code = '''
3422        destElem = srcElem1;
3423        unsigned groupSize = ((1 << 3) / sizeof(Element));
3424        unsigned reverseMask = (groupSize - 1);
3425        j = i ^ reverseMask;
3426    '''
3427    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3428    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3429
3430    split('exec')
3431    exec_output += vcompares + vcomparesL
3432
3433    vpaddlCode = '''
3434        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3435    '''
3436    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3437    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3438
3439    vpadalCode = '''
3440        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3441    '''
3442    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3443    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3444
3445    vclsCode = '''
3446        unsigned count = 0;
3447        if (srcElem1 < 0) {
3448            srcElem1 <<= 1;
3449            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3450                count++;
3451                srcElem1 <<= 1;
3452            }
3453        } else {
3454            srcElem1 <<= 1;
3455            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3456                count++;
3457                srcElem1 <<= 1;
3458            }
3459        }
3460        destElem = count;
3461    '''
3462    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3463    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3464
3465    vclzCode = '''
3466        unsigned count = 0;
3467        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3468            count++;
3469            srcElem1 <<= 1;
3470        }
3471        destElem = count;
3472    '''
3473    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3474    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3475
3476    vcntCode = '''
3477        unsigned count = 0;
3478        while (srcElem1 && count < sizeof(Element) * 8) {
3479            count += srcElem1 & 0x1;
3480            srcElem1 >>= 1;
3481        }
3482        destElem = count;
3483    '''
3484
3485    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3486    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3487
3488    vmvnCode = '''
3489        destElem = ~srcElem1;
3490    '''
3491    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3492    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3493
3494    vqabsCode = '''
3495        FPSCR fpscr = (FPSCR) FpscrQc;
3496        if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3497            fpscr.qc = 1;
3498            destElem = ~srcElem1;
3499        } else if (srcElem1 < 0) {
3500            destElem = -srcElem1;
3501        } else {
3502            destElem = srcElem1;
3503        }
3504        FpscrQc = fpscr;
3505    '''
3506    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3507    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3508
3509    vqnegCode = '''
3510        FPSCR fpscr = (FPSCR) FpscrQc;
3511        if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3512            fpscr.qc = 1;
3513            destElem = ~srcElem1;
3514        } else {
3515            destElem = -srcElem1;
3516        }
3517        FpscrQc = fpscr;
3518    '''
3519    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3520    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3521
3522    vabsCode = '''
3523        if (srcElem1 < 0) {
3524            destElem = -srcElem1;
3525        } else {
3526            destElem = srcElem1;
3527        }
3528    '''
3529
3530    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3531    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3532    vabsfpCode = '''
3533        union
3534        {
3535            uint32_t i;
3536            float f;
3537        } cStruct;
3538        cStruct.f = srcReg1;
3539        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3540        destReg = cStruct.f;
3541    '''
3542    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3543    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3544
3545    vnegCode = '''
3546        destElem = -srcElem1;
3547    '''
3548    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3549    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3550    vnegfpCode = '''
3551        destReg = -srcReg1;
3552    '''
3553    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3554    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3555
3556    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3557    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3558    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3559    vcgtfpCode = '''
3560        FPSCR fpscr = (FPSCR) FpscrExc;
3561        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3562                             true, true, VfpRoundNearest);
3563        destReg = (res == 0) ? -1 : 0;
3564        if (res == 2.0)
3565            fpscr.ioc = 1;
3566        FpscrExc = fpscr;
3567    '''
3568    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3569            2, vcgtfpCode, toInt = True)
3570    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3571            4, vcgtfpCode, toInt = True)
3572
3573    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3574    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3575    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3576    vcgefpCode = '''
3577        FPSCR fpscr = (FPSCR) FpscrExc;
3578        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3579                             true, true, VfpRoundNearest);
3580        destReg = (res == 0) ? -1 : 0;
3581        if (res == 2.0)
3582            fpscr.ioc = 1;
3583        FpscrExc = fpscr;
3584    '''
3585    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3586            2, vcgefpCode, toInt = True)
3587    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3588            4, vcgefpCode, toInt = True)
3589
3590    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3591    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3592    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3593    vceqfpCode = '''
3594        FPSCR fpscr = (FPSCR) FpscrExc;
3595        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3596                             true, true, VfpRoundNearest);
3597        destReg = (res == 0) ? -1 : 0;
3598        if (res == 2.0)
3599            fpscr.ioc = 1;
3600        FpscrExc = fpscr;
3601    '''
3602    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3603            2, vceqfpCode, toInt = True)
3604    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3605            4, vceqfpCode, toInt = True)
3606
3607    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3608    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3609    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3610    vclefpCode = '''
3611        FPSCR fpscr = (FPSCR) FpscrExc;
3612        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3613                             true, true, VfpRoundNearest);
3614        destReg = (res == 0) ? -1 : 0;
3615        if (res == 2.0)
3616            fpscr.ioc = 1;
3617        FpscrExc = fpscr;
3618    '''
3619    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3620            2, vclefpCode, toInt = True)
3621    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3622            4, vclefpCode, toInt = True)
3623
3624    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3625    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3626    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3627    vcltfpCode = '''
3628        FPSCR fpscr = (FPSCR) FpscrExc;
3629        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3630                             true, true, VfpRoundNearest);
3631        destReg = (res == 0) ? -1 : 0;
3632        if (res == 2.0)
3633            fpscr.ioc = 1;
3634        FpscrExc = fpscr;
3635    '''
3636    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3637            2, vcltfpCode, toInt = True)
3638    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3639            4, vcltfpCode, toInt = True)
3640
3641    vswpCode = '''
3642        FloatRegBits mid;
3643        for (unsigned r = 0; r < rCount; r++) {
3644            mid = srcReg1.regs[r];
3645            srcReg1.regs[r] = destReg.regs[r];
3646            destReg.regs[r] = mid;
3647        }
3648    '''
3649    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3650    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3651
3652    vtrnCode = '''
3653        Element mid;
3654        for (unsigned i = 0; i < eCount; i += 2) {
3655            mid = srcReg1.elements[i];
3656            srcReg1.elements[i] = destReg.elements[i + 1];
3657            destReg.elements[i + 1] = mid;
3658        }
3659    '''
3660    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3661            smallUnsignedTypes, 2, vtrnCode)
3662    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3663            smallUnsignedTypes, 4, vtrnCode)
3664
3665    vuzpCode = '''
3666        Element mid[eCount];
3667        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3668        for (unsigned i = 0; i < eCount / 2; i++) {
3669            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3670            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3671            destReg.elements[i] = destReg.elements[2 * i];
3672        }
3673        for (unsigned i = 0; i < eCount / 2; i++) {
3674            destReg.elements[eCount / 2 + i] = mid[2 * i];
3675        }
3676    '''
3677    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3678    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3679
3680    vzipCode = '''
3681        Element mid[eCount];
3682        memcpy(&mid, &destReg, sizeof(destReg));
3683        for (unsigned i = 0; i < eCount / 2; i++) {
3684            destReg.elements[2 * i] = mid[i];
3685            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3686        }
3687        for (int i = 0; i < eCount / 2; i++) {
3688            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3689            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3690        }
3691    '''
3692    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3693    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3694
3695    vmovnCode = 'destElem = srcElem1;'
3696    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3697
3698    vdupCode = 'destElem = srcElem1;'
3699    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3700    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3701
3702    def vdupGprInst(name, Name, opClass, types, rCount):
3703        global header_output, exec_output
3704        eWalkCode = simdEnabledCheckCode + '''
3705        RegVect destReg;
3706        for (unsigned i = 0; i < eCount; i++) {
3707            destReg.elements[i] = htog((Element)Op1);
3708        }
3709        '''
3710        for reg in range(rCount):
3711            eWalkCode += '''
3712            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3713            ''' % { "reg" : reg }
3714        iop = InstObjParams(name, Name,
3715                            "RegRegOp",
3716                            { "code": eWalkCode,
3717                              "r_count": rCount,
3718                              "predicate_test": predicateTest,
3719                              "op_class": opClass }, [])
3720        header_output += NeonRegRegOpDeclare.subst(iop)
3721        exec_output += NeonEqualRegExecute.subst(iop)
3722        for type in types:
3723            substDict = { "targs" : type,
3724                          "class_name" : Name }
3725            exec_output += NeonExecDeclare.subst(substDict)
3726    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3727    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3728
3729    vmovCode = 'destElem = imm;'
3730    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3731    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3732
3733    vorrCode = 'destElem |= imm;'
3734    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3735    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3736
3737    vmvnCode = 'destElem = ~imm;'
3738    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3739    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3740
3741    vbicCode = 'destElem &= ~imm;'
3742    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3743    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3744
3745    vqmovnCode = '''
3746    FPSCR fpscr = (FPSCR) FpscrQc;
3747    destElem = srcElem1;
3748    if ((BigElement)destElem != srcElem1) {
3749        fpscr.qc = 1;
3750        destElem = mask(sizeof(Element) * 8 - 1);
3751        if (srcElem1 < 0)
3752            destElem = ~destElem;
3753    }
3754    FpscrQc = fpscr;
3755    '''
3756    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3757
3758    vqmovunCode = '''
3759    FPSCR fpscr = (FPSCR) FpscrQc;
3760    destElem = srcElem1;
3761    if ((BigElement)destElem != srcElem1) {
3762        fpscr.qc = 1;
3763        destElem = mask(sizeof(Element) * 8);
3764    }
3765    FpscrQc = fpscr;
3766    '''
3767    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3768            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3769
3770    vqmovunsCode = '''
3771    FPSCR fpscr = (FPSCR) FpscrQc;
3772    destElem = srcElem1;
3773    if (srcElem1 < 0 ||
3774            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3775        fpscr.qc = 1;
3776        destElem = mask(sizeof(Element) * 8);
3777        if (srcElem1 < 0)
3778            destElem = ~destElem;
3779    }
3780    FpscrQc = fpscr;
3781    '''
3782    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3783            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3784
3785    def buildVext(name, Name, opClass, types, rCount, op):
3786        global header_output, exec_output
3787        eWalkCode = simdEnabledCheckCode + '''
3788        RegVect srcReg1, srcReg2, destReg;
3789        '''
3790        for reg in range(rCount):
3791            eWalkCode += '''
3792                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3793                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3794            ''' % { "reg" : reg }
3795        eWalkCode += op
3796        for reg in range(rCount):
3797            eWalkCode += '''
3798            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3799            ''' % { "reg" : reg }
3800        iop = InstObjParams(name, Name,
3801                            "RegRegRegImmOp",
3802                            { "code": eWalkCode,
3803                              "r_count": rCount,
3804                              "predicate_test": predicateTest,
3805                              "op_class": opClass }, [])
3806        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3807        exec_output += NeonEqualRegExecute.subst(iop)
3808        for type in types:
3809            substDict = { "targs" : type,
3810                          "class_name" : Name }
3811            exec_output += NeonExecDeclare.subst(substDict)
3812
3813    vextCode = '''
3814        for (unsigned i = 0; i < eCount; i++) {
3815            unsigned index = i + imm;
3816            if (index < eCount) {
3817                destReg.elements[i] = srcReg1.elements[index];
3818            } else {
3819                index -= eCount;
3820                if (index >= eCount) {
3821                    fault = std::make_shared<UndefinedInstruction>(machInst,
3822                                                                   false,
3823                                                                   mnemonic);
3824                } else {
3825                    destReg.elements[i] = srcReg2.elements[index];
3826                }
3827            }
3828        }
3829    '''
3830    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3831    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3832
3833    def buildVtbxl(name, Name, opClass, length, isVtbl):
3834        global header_output, decoder_output, exec_output
3835        code = simdEnabledCheckCode + '''
3836            union
3837            {
3838                uint8_t bytes[32];
3839                FloatRegBits regs[8];
3840            } table;
3841
3842            union
3843            {
3844                uint8_t bytes[8];
3845                FloatRegBits regs[2];
3846            } destReg, srcReg2;
3847
3848            const unsigned length = %(length)d;
3849            const bool isVtbl = %(isVtbl)s;
3850
3851            srcReg2.regs[0] = htog(FpOp2P0_uw);
3852            srcReg2.regs[1] = htog(FpOp2P1_uw);
3853
3854            destReg.regs[0] = htog(FpDestP0_uw);
3855            destReg.regs[1] = htog(FpDestP1_uw);
3856        ''' % { "length" : length, "isVtbl" : isVtbl }
3857        for reg in range(8):
3858            if reg < length * 2:
3859                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3860                        { "reg" : reg }
3861            else:
3862                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3863        code += '''
3864        for (unsigned i = 0; i < sizeof(destReg); i++) {
3865            uint8_t index = srcReg2.bytes[i];
3866            if (index < 8 * length) {
3867                destReg.bytes[i] = table.bytes[index];
3868            } else {
3869                if (isVtbl)
3870                    destReg.bytes[i] = 0;
3871                // else destReg.bytes[i] unchanged
3872            }
3873        }
3874
3875        FpDestP0_uw = gtoh(destReg.regs[0]);
3876        FpDestP1_uw = gtoh(destReg.regs[1]);
3877        '''
3878        iop = InstObjParams(name, Name,
3879                            "RegRegRegOp",
3880                            { "code": code,
3881                              "predicate_test": predicateTest,
3882                              "op_class": opClass }, [])
3883        header_output += RegRegRegOpDeclare.subst(iop)
3884        decoder_output += RegRegRegOpConstructor.subst(iop)
3885        exec_output += PredOpExecute.subst(iop)
3886
3887    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3888    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3889    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3890    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3891
3892    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3893    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3894    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3895    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3896}};
3897