neon.isa revision 10197
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                          ExtMachInst machInst, IntRegIndex dest,
98                          IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                          ExtMachInst machInst, IntRegIndex dest,
116                          IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133                             IntRegIndex dest, IntRegIndex op1,
134                             IntRegIndex op2)
135    {
136        switch (size) {
137          case 1:
138            return new Base<int16_t>(machInst, dest, op1, op2);
139          case 2:
140            return new Base<int32_t>(machInst, dest, op1, op2);
141          default:
142            return new Unknown(machInst);
143        }
144    }
145
146    template <template <typename T> class Base>
147    StaticInstPtr
148    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149                                IntRegIndex dest, IntRegIndex op1,
150                                IntRegIndex op2, uint64_t imm)
151    {
152        switch (size) {
153          case 1:
154            return new Base<int16_t>(machInst, dest, op1, op2, imm);
155          case 2:
156            return new Base<int32_t>(machInst, dest, op1, op2, imm);
157          default:
158            return new Unknown(machInst);
159        }
160    }
161
162    template <template <typename T> class Base>
163    StaticInstPtr
164    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165                           ExtMachInst machInst, IntRegIndex dest,
166                           IntRegIndex op1, IntRegIndex op2)
167    {
168        if (notSigned) {
169            return decodeNeonUThreeUSReg<Base>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<Base>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUThreeSReg(bool q, unsigned size,
181                         ExtMachInst machInst, IntRegIndex dest,
182                         IntRegIndex op1, IntRegIndex op2)
183    {
184        if (q) {
185            return decodeNeonUThreeUSReg<BaseQ>(
186                    size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonUThreeUSReg<BaseD>(
189                    size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonSThreeSReg(bool q, unsigned size,
197                         ExtMachInst machInst, IntRegIndex dest,
198                         IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonSThreeUSReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonSThreeUSReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeXReg(bool q, unsigned size,
213                         ExtMachInst machInst, IntRegIndex dest,
214                         IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUSReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUThreeXReg(bool q, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (q) {
233            return decodeNeonUThreeUReg<BaseQ>(
234                    size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonUThreeUSReg<BaseD>(
237                    size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245                          ExtMachInst machInst, IntRegIndex dest,
246                          IntRegIndex op1, IntRegIndex op2)
247    {
248        if (notSigned) {
249            return decodeNeonUThreeSReg<BaseD, BaseQ>(
250                    q, size, machInst, dest, op1, op2);
251        } else {
252            return decodeNeonSThreeSReg<BaseD, BaseQ>(
253                    q, size, machInst, dest, op1, op2);
254        }
255    }
256
257    template <template <typename T> class BaseD,
258              template <typename T> class BaseQ>
259    StaticInstPtr
260    decodeNeonUThreeReg(bool q, unsigned size,
261                        ExtMachInst machInst, IntRegIndex dest,
262                        IntRegIndex op1, IntRegIndex op2)
263    {
264        if (q) {
265            return decodeNeonUThreeUReg<BaseQ>(
266                    size, machInst, dest, op1, op2);
267        } else {
268            return decodeNeonUThreeUReg<BaseD>(
269                    size, machInst, dest, op1, op2);
270        }
271    }
272
273    template <template <typename T> class BaseD,
274              template <typename T> class BaseQ>
275    StaticInstPtr
276    decodeNeonSThreeReg(bool q, unsigned size,
277                        ExtMachInst machInst, IntRegIndex dest,
278                        IntRegIndex op1, IntRegIndex op2)
279    {
280        if (q) {
281            return decodeNeonSThreeUReg<BaseQ>(
282                    size, machInst, dest, op1, op2);
283        } else {
284            return decodeNeonSThreeUReg<BaseD>(
285                    size, machInst, dest, op1, op2);
286        }
287    }
288
289    template <template <typename T> class BaseD,
290              template <typename T> class BaseQ>
291    StaticInstPtr
292    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293                         ExtMachInst machInst, IntRegIndex dest,
294                         IntRegIndex op1, IntRegIndex op2)
295    {
296        if (notSigned) {
297            return decodeNeonUThreeReg<BaseD, BaseQ>(
298                    q, size, machInst, dest, op1, op2);
299        } else {
300            return decodeNeonSThreeReg<BaseD, BaseQ>(
301                    q, size, machInst, dest, op1, op2);
302        }
303    }
304
305    template <template <typename T> class BaseD,
306              template <typename T> class BaseQ>
307    StaticInstPtr
308    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310    {
311        if (q) {
312            if (size)
313                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314            else
315                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316        } else {
317            if (size)
318                return new Unknown(machInst);
319            else
320                return new BaseD<uint32_t>(machInst, dest, op1, op2);
321        }
322    }
323
324    template <template <typename T> class Base>
325    StaticInstPtr
326    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328    {
329        if (size)
330            return new Base<uint64_t>(machInst, dest, op1, op2);
331        else
332            return new Base<uint32_t>(machInst, dest, op1, op2);
333    }
334
335    template <template <typename T> class Base>
336    StaticInstPtr
337    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338                               IntRegIndex dest, IntRegIndex op1,
339                               IntRegIndex op2, uint64_t imm)
340    {
341        if (size)
342            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343        else
344            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345    }
346
347    template <template <typename T> class BaseD,
348              template <typename T> class BaseQ>
349    StaticInstPtr
350    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351                                IntRegIndex dest, IntRegIndex op1,
352                                IntRegIndex op2, uint64_t imm)
353    {
354        if (q) {
355            switch (size) {
356              case 1:
357                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358              case 2:
359                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360              default:
361                return new Unknown(machInst);
362            }
363        } else {
364            switch (size) {
365              case 1:
366                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367              case 2:
368                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369              default:
370                return new Unknown(machInst);
371            }
372        }
373    }
374
375    template <template <typename T> class BaseD,
376              template <typename T> class BaseQ>
377    StaticInstPtr
378    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379                                IntRegIndex dest, IntRegIndex op1,
380                                IntRegIndex op2, uint64_t imm)
381    {
382        if (q) {
383            switch (size) {
384              case 1:
385                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386              case 2:
387                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388              default:
389                return new Unknown(machInst);
390            }
391        } else {
392            switch (size) {
393              case 1:
394                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395              case 2:
396                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397              default:
398                return new Unknown(machInst);
399            }
400        }
401    }
402
403    template <template <typename T> class BaseD,
404              template <typename T> class BaseQ>
405    StaticInstPtr
406    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407                             IntRegIndex dest, IntRegIndex op1,
408                             IntRegIndex op2, uint64_t imm)
409    {
410        if (q) {
411            if (size)
412                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413            else
414                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415        } else {
416            if (size)
417                return new Unknown(machInst);
418            else
419                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420        }
421    }
422
423    template <template <typename T> class BaseD,
424              template <typename T> class BaseQ>
425    StaticInstPtr
426    decodeNeonUTwoShiftReg(bool q, unsigned size,
427                           ExtMachInst machInst, IntRegIndex dest,
428                           IntRegIndex op1, uint64_t imm)
429    {
430        if (q) {
431            switch (size) {
432              case 0:
433                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434              case 1:
435                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436              case 2:
437                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438              case 3:
439                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440              default:
441                return new Unknown(machInst);
442            }
443        } else {
444            switch (size) {
445              case 0:
446                return new BaseD<uint8_t>(machInst, dest, op1, imm);
447              case 1:
448                return new BaseD<uint16_t>(machInst, dest, op1, imm);
449              case 2:
450                return new BaseD<uint32_t>(machInst, dest, op1, imm);
451              case 3:
452                return new BaseD<uint64_t>(machInst, dest, op1, imm);
453              default:
454                return new Unknown(machInst);
455            }
456        }
457    }
458
459    template <template <typename T> class BaseD,
460              template <typename T> class BaseQ>
461    StaticInstPtr
462    decodeNeonSTwoShiftReg(bool q, unsigned size,
463                           ExtMachInst machInst, IntRegIndex dest,
464                           IntRegIndex op1, uint64_t imm)
465    {
466        if (q) {
467            switch (size) {
468              case 0:
469                return new BaseQ<int8_t>(machInst, dest, op1, imm);
470              case 1:
471                return new BaseQ<int16_t>(machInst, dest, op1, imm);
472              case 2:
473                return new BaseQ<int32_t>(machInst, dest, op1, imm);
474              case 3:
475                return new BaseQ<int64_t>(machInst, dest, op1, imm);
476              default:
477                return new Unknown(machInst);
478            }
479        } else {
480            switch (size) {
481              case 0:
482                return new BaseD<int8_t>(machInst, dest, op1, imm);
483              case 1:
484                return new BaseD<int16_t>(machInst, dest, op1, imm);
485              case 2:
486                return new BaseD<int32_t>(machInst, dest, op1, imm);
487              case 3:
488                return new BaseD<int64_t>(machInst, dest, op1, imm);
489              default:
490                return new Unknown(machInst);
491            }
492        }
493    }
494
495
496    template <template <typename T> class BaseD,
497              template <typename T> class BaseQ>
498    StaticInstPtr
499    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500                            ExtMachInst machInst, IntRegIndex dest,
501                            IntRegIndex op1, uint64_t imm)
502    {
503        if (notSigned) {
504            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505                    q, size, machInst, dest, op1, imm);
506        } else {
507            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508                    q, size, machInst, dest, op1, imm);
509        }
510    }
511
512    template <template <typename T> class Base>
513    StaticInstPtr
514    decodeNeonUTwoShiftUSReg(unsigned size,
515                             ExtMachInst machInst, IntRegIndex dest,
516                             IntRegIndex op1, uint64_t imm)
517    {
518        switch (size) {
519          case 0:
520            return new Base<uint8_t>(machInst, dest, op1, imm);
521          case 1:
522            return new Base<uint16_t>(machInst, dest, op1, imm);
523          case 2:
524            return new Base<uint32_t>(machInst, dest, op1, imm);
525          default:
526            return new Unknown(machInst);
527        }
528    }
529
530    template <template <typename T> class Base>
531    StaticInstPtr
532    decodeNeonUTwoShiftUReg(unsigned size,
533                            ExtMachInst machInst, IntRegIndex dest,
534                            IntRegIndex op1, uint64_t imm)
535    {
536        switch (size) {
537          case 0:
538            return new Base<uint8_t>(machInst, dest, op1, imm);
539          case 1:
540            return new Base<uint16_t>(machInst, dest, op1, imm);
541          case 2:
542            return new Base<uint32_t>(machInst, dest, op1, imm);
543          case 3:
544            return new Base<uint64_t>(machInst, dest, op1, imm);
545          default:
546            return new Unknown(machInst);
547        }
548    }
549
550    template <template <typename T> class Base>
551    StaticInstPtr
552    decodeNeonSTwoShiftUReg(unsigned size,
553                            ExtMachInst machInst, IntRegIndex dest,
554                            IntRegIndex op1, uint64_t imm)
555    {
556        switch (size) {
557          case 0:
558            return new Base<int8_t>(machInst, dest, op1, imm);
559          case 1:
560            return new Base<int16_t>(machInst, dest, op1, imm);
561          case 2:
562            return new Base<int32_t>(machInst, dest, op1, imm);
563          case 3:
564            return new Base<int64_t>(machInst, dest, op1, imm);
565          default:
566            return new Unknown(machInst);
567        }
568    }
569
570    template <template <typename T> class BaseD,
571              template <typename T> class BaseQ>
572    StaticInstPtr
573    decodeNeonUTwoShiftSReg(bool q, unsigned size,
574                            ExtMachInst machInst, IntRegIndex dest,
575                            IntRegIndex op1, uint64_t imm)
576    {
577        if (q) {
578            return decodeNeonUTwoShiftUSReg<BaseQ>(
579                    size, machInst, dest, op1, imm);
580        } else {
581            return decodeNeonUTwoShiftUSReg<BaseD>(
582                    size, machInst, dest, op1, imm);
583        }
584    }
585
586    template <template <typename T> class Base>
587    StaticInstPtr
588    decodeNeonSTwoShiftUSReg(unsigned size,
589                             ExtMachInst machInst, IntRegIndex dest,
590                             IntRegIndex op1, uint64_t imm)
591    {
592        switch (size) {
593          case 0:
594            return new Base<int8_t>(machInst, dest, op1, imm);
595          case 1:
596            return new Base<int16_t>(machInst, dest, op1, imm);
597          case 2:
598            return new Base<int32_t>(machInst, dest, op1, imm);
599          default:
600            return new Unknown(machInst);
601        }
602    }
603
604    template <template <typename T> class BaseD,
605              template <typename T> class BaseQ>
606    StaticInstPtr
607    decodeNeonSTwoShiftSReg(bool q, unsigned size,
608                            ExtMachInst machInst, IntRegIndex dest,
609                            IntRegIndex op1, uint64_t imm)
610    {
611        if (q) {
612            return decodeNeonSTwoShiftUSReg<BaseQ>(
613                    size, machInst, dest, op1, imm);
614        } else {
615            return decodeNeonSTwoShiftUSReg<BaseD>(
616                    size, machInst, dest, op1, imm);
617        }
618    }
619
620    template <template <typename T> class BaseD,
621              template <typename T> class BaseQ>
622    StaticInstPtr
623    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624                             ExtMachInst machInst, IntRegIndex dest,
625                             IntRegIndex op1, uint64_t imm)
626    {
627        if (notSigned) {
628            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629                    q, size, machInst, dest, op1, imm);
630        } else {
631            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632                    q, size, machInst, dest, op1, imm);
633        }
634    }
635
636    template <template <typename T> class BaseD,
637              template <typename T> class BaseQ>
638    StaticInstPtr
639    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641    {
642        if (q) {
643            return decodeNeonUTwoShiftUReg<BaseQ>(
644                size, machInst, dest, op1, imm);
645        } else {
646            return decodeNeonUTwoShiftUSReg<BaseD>(
647                size, machInst, dest, op1, imm);
648        }
649    }
650
651    template <template <typename T> class BaseD,
652              template <typename T> class BaseQ>
653    StaticInstPtr
654    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656    {
657        if (q) {
658            return decodeNeonSTwoShiftUReg<BaseQ>(
659                size, machInst, dest, op1, imm);
660        } else {
661            return decodeNeonSTwoShiftUSReg<BaseD>(
662                size, machInst, dest, op1, imm);
663        }
664    }
665
666    template <template <typename T> class Base>
667    StaticInstPtr
668    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670    {
671        if (size)
672            return new Base<uint64_t>(machInst, dest, op1, imm);
673        else
674            return new Base<uint32_t>(machInst, dest, op1, imm);
675    }
676
677    template <template <typename T> class BaseD,
678              template <typename T> class BaseQ>
679    StaticInstPtr
680    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682    {
683        if (q) {
684            if (size)
685                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686            else
687                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688        } else {
689            if (size)
690                return new Unknown(machInst);
691            else
692                return new BaseD<uint32_t>(machInst, dest, op1, imm);
693        }
694    }
695
696    template <template <typename T> class Base>
697    StaticInstPtr
698    decodeNeonUTwoMiscUSReg(unsigned size,
699                            ExtMachInst machInst, IntRegIndex dest,
700                            IntRegIndex op1)
701    {
702        switch (size) {
703          case 0:
704            return new Base<uint8_t>(machInst, dest, op1);
705          case 1:
706            return new Base<uint16_t>(machInst, dest, op1);
707          case 2:
708            return new Base<uint32_t>(machInst, dest, op1);
709          default:
710            return new Unknown(machInst);
711        }
712    }
713
714    template <template <typename T> class Base>
715    StaticInstPtr
716    decodeNeonSTwoMiscUSReg(unsigned size,
717                            ExtMachInst machInst, IntRegIndex dest,
718                            IntRegIndex op1)
719    {
720        switch (size) {
721          case 0:
722            return new Base<int8_t>(machInst, dest, op1);
723          case 1:
724            return new Base<int16_t>(machInst, dest, op1);
725          case 2:
726            return new Base<int32_t>(machInst, dest, op1);
727          default:
728            return new Unknown(machInst);
729        }
730    }
731
732    template <template <typename T> class BaseD,
733              template <typename T> class BaseQ>
734    StaticInstPtr
735    decodeNeonUTwoMiscSReg(bool q, unsigned size,
736                           ExtMachInst machInst, IntRegIndex dest,
737                           IntRegIndex op1)
738    {
739        if (q) {
740            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741        } else {
742            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743        }
744    }
745
746    template <template <typename T> class BaseD,
747              template <typename T> class BaseQ>
748    StaticInstPtr
749    decodeNeonSTwoMiscSReg(bool q, unsigned size,
750                           ExtMachInst machInst, IntRegIndex dest,
751                           IntRegIndex op1)
752    {
753        if (q) {
754            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755        } else {
756            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757        }
758    }
759
760    template <template <typename T> class Base>
761    StaticInstPtr
762    decodeNeonUTwoMiscUReg(unsigned size,
763                           ExtMachInst machInst, IntRegIndex dest,
764                           IntRegIndex op1)
765    {
766        switch (size) {
767          case 0:
768            return new Base<uint8_t>(machInst, dest, op1);
769          case 1:
770            return new Base<uint16_t>(machInst, dest, op1);
771          case 2:
772            return new Base<uint32_t>(machInst, dest, op1);
773          case 3:
774            return new Base<uint64_t>(machInst, dest, op1);
775          default:
776            return new Unknown(machInst);
777        }
778    }
779
780    template <template <typename T> class Base>
781    StaticInstPtr
782    decodeNeonSTwoMiscUReg(unsigned size,
783                           ExtMachInst machInst, IntRegIndex dest,
784                           IntRegIndex op1)
785    {
786        switch (size) {
787          case 0:
788            return new Base<int8_t>(machInst, dest, op1);
789          case 1:
790            return new Base<int16_t>(machInst, dest, op1);
791          case 2:
792            return new Base<int32_t>(machInst, dest, op1);
793          case 3:
794            return new Base<int64_t>(machInst, dest, op1);
795          default:
796            return new Unknown(machInst);
797        }
798    }
799
800    template <template <typename T> class BaseD,
801              template <typename T> class BaseQ>
802    StaticInstPtr
803    decodeNeonSTwoMiscReg(bool q, unsigned size,
804                          ExtMachInst machInst, IntRegIndex dest,
805                          IntRegIndex op1)
806    {
807        if (q) {
808            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809        } else {
810            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811        }
812    }
813
814    template <template <typename T> class BaseD,
815              template <typename T> class BaseQ>
816    StaticInstPtr
817    decodeNeonUTwoMiscReg(bool q, unsigned size,
818                          ExtMachInst machInst, IntRegIndex dest,
819                          IntRegIndex op1)
820    {
821        if (q) {
822            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823        } else {
824            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825        }
826    }
827
828    template <template <typename T> class BaseD,
829              template <typename T> class BaseQ>
830    StaticInstPtr
831    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832                            ExtMachInst machInst, IntRegIndex dest,
833                            IntRegIndex op1)
834    {
835        if (notSigned) {
836            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837                    q, size, machInst, dest, op1);
838        } else {
839            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840                    q, size, machInst, dest, op1);
841        }
842    }
843
844    template <template <typename T> class BaseD,
845              template <typename T> class BaseQ>
846    StaticInstPtr
847    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848                           IntRegIndex dest, IntRegIndex op1)
849    {
850        if (q) {
851            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852        } else {
853            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854        }
855    }
856
857    template <template <typename T> class BaseD,
858              template <typename T> class BaseQ>
859    StaticInstPtr
860    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861                           IntRegIndex dest, IntRegIndex op1)
862    {
863        if (q) {
864            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865        } else {
866            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867        }
868    }
869
870    template <template <typename T> class BaseD,
871              template <typename T> class BaseQ>
872    StaticInstPtr
873    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874                            IntRegIndex dest, IntRegIndex op1)
875    {
876        if (q) {
877            if (size)
878                return new BaseQ<uint64_t>(machInst, dest, op1);
879            else
880                return new BaseQ<uint32_t>(machInst, dest, op1);
881        } else {
882            if (size)
883                return new Unknown(machInst);
884            else
885                return new BaseD<uint32_t>(machInst, dest, op1);
886        }
887    }
888
889    template <template <typename T> class BaseD,
890              template <typename T> class BaseQ>
891    StaticInstPtr
892    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893                                   IntRegIndex dest, IntRegIndex op1)
894    {
895        if (size)
896            return new BaseQ<uint64_t>(machInst, dest, op1);
897        else
898            return new BaseD<uint32_t>(machInst, dest, op1);
899    }
900
901    template <template <typename T> class Base>
902    StaticInstPtr
903    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904                              IntRegIndex dest, IntRegIndex op1)
905    {
906        if (size)
907            return new Base<uint64_t>(machInst, dest, op1);
908        else
909            return new Base<uint32_t>(machInst, dest, op1);
910    }
911
912    template <template <typename T> class BaseD,
913              template <typename T> class BaseQ>
914    StaticInstPtr
915    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916                              IntRegIndex dest, IntRegIndex op1)
917    {
918        if (q) {
919            switch (size) {
920              case 0x0:
921                return new BaseQ<uint8_t>(machInst, dest, op1);
922              case 0x1:
923                return new BaseQ<uint16_t>(machInst, dest, op1);
924              case 0x2:
925                return new BaseQ<uint32_t>(machInst, dest, op1);
926              default:
927                return new Unknown(machInst);
928            }
929        } else {
930            switch (size) {
931              case 0x0:
932                return new BaseD<uint8_t>(machInst, dest, op1);
933              case 0x1:
934                return new BaseD<uint16_t>(machInst, dest, op1);
935              default:
936                return new Unknown(machInst);
937            }
938        }
939    }
940
941    template <template <typename T> class BaseD,
942              template <typename T> class BaseQ,
943              template <typename T> class BaseBQ>
944    StaticInstPtr
945    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946                              IntRegIndex dest, IntRegIndex op1)
947    {
948        if (q) {
949            switch (size) {
950              case 0x0:
951                return new BaseQ<uint8_t>(machInst, dest, op1);
952              case 0x1:
953                return new BaseQ<uint16_t>(machInst, dest, op1);
954              case 0x2:
955                return new BaseBQ<uint32_t>(machInst, dest, op1);
956              default:
957                return new Unknown(machInst);
958            }
959        } else {
960            switch (size) {
961              case 0x0:
962                return new BaseD<uint8_t>(machInst, dest, op1);
963              case 0x1:
964                return new BaseD<uint16_t>(machInst, dest, op1);
965              default:
966                return new Unknown(machInst);
967            }
968        }
969    }
970
971    template <template <typename T> class BaseD,
972              template <typename T> class BaseQ>
973    StaticInstPtr
974    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975                              IntRegIndex dest, IntRegIndex op1)
976    {
977        if (q) {
978            switch (size) {
979              case 0x0:
980                return new BaseQ<int8_t>(machInst, dest, op1);
981              case 0x1:
982                return new BaseQ<int16_t>(machInst, dest, op1);
983              case 0x2:
984                return new BaseQ<int32_t>(machInst, dest, op1);
985              default:
986                return new Unknown(machInst);
987            }
988        } else {
989            switch (size) {
990              case 0x0:
991                return new BaseD<int8_t>(machInst, dest, op1);
992              case 0x1:
993                return new BaseD<int16_t>(machInst, dest, op1);
994              default:
995                return new Unknown(machInst);
996            }
997        }
998    }
999
1000    template <template <typename T> class BaseD,
1001              template <typename T> class BaseQ,
1002              template <typename T> class BaseBQ>
1003    StaticInstPtr
1004    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005                                  IntRegIndex dest, IntRegIndex op1)
1006    {
1007        if (q) {
1008            switch (size) {
1009              case 0x0:
1010                return new BaseQ<uint8_t>(machInst, dest, op1);
1011              case 0x1:
1012                return new BaseQ<uint16_t>(machInst, dest, op1);
1013              case 0x2:
1014                return new BaseBQ<uint32_t>(machInst, dest, op1);
1015              default:
1016                return new Unknown(machInst);
1017            }
1018        } else {
1019            switch (size) {
1020              case 0x0:
1021                return new BaseD<uint8_t>(machInst, dest, op1);
1022              case 0x1:
1023                return new BaseD<uint16_t>(machInst, dest, op1);
1024              default:
1025                return new Unknown(machInst);
1026            }
1027        }
1028    }
1029
1030    template <template <typename T> class BaseD,
1031              template <typename T> class BaseQ,
1032              template <typename T> class BaseBQ>
1033    StaticInstPtr
1034    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035                                  IntRegIndex dest, IntRegIndex op1)
1036    {
1037        if (q) {
1038            switch (size) {
1039              case 0x0:
1040                return new BaseQ<int8_t>(machInst, dest, op1);
1041              case 0x1:
1042                return new BaseQ<int16_t>(machInst, dest, op1);
1043              case 0x2:
1044                return new BaseBQ<int32_t>(machInst, dest, op1);
1045              default:
1046                return new Unknown(machInst);
1047            }
1048        } else {
1049            switch (size) {
1050              case 0x0:
1051                return new BaseD<int8_t>(machInst, dest, op1);
1052              case 0x1:
1053                return new BaseD<int16_t>(machInst, dest, op1);
1054              default:
1055                return new Unknown(machInst);
1056            }
1057        }
1058    }
1059}};
1060
1061let {{
1062    header_output = ""
1063    exec_output = ""
1064
1065    vcompares = '''
1066    static float
1067    vcgtFunc(float op1, float op2)
1068    {
1069        if (std::isnan(op1) || std::isnan(op2))
1070            return 2.0;
1071        return (op1 > op2) ? 0.0 : 1.0;
1072    }
1073
1074    static float
1075    vcgeFunc(float op1, float op2)
1076    {
1077        if (std::isnan(op1) || std::isnan(op2))
1078            return 2.0;
1079        return (op1 >= op2) ? 0.0 : 1.0;
1080    }
1081
1082    static float
1083    vceqFunc(float op1, float op2)
1084    {
1085        if (isSnan(op1) || isSnan(op2))
1086            return 2.0;
1087        return (op1 == op2) ? 0.0 : 1.0;
1088    }
1089'''
1090    vcomparesL = '''
1091    static float
1092    vcleFunc(float op1, float op2)
1093    {
1094        if (std::isnan(op1) || std::isnan(op2))
1095            return 2.0;
1096        return (op1 <= op2) ? 0.0 : 1.0;
1097    }
1098
1099    static float
1100    vcltFunc(float op1, float op2)
1101    {
1102        if (std::isnan(op1) || std::isnan(op2))
1103            return 2.0;
1104        return (op1 < op2) ? 0.0 : 1.0;
1105    }
1106'''
1107    vacomparesG = '''
1108    static float
1109    vacgtFunc(float op1, float op2)
1110    {
1111        if (std::isnan(op1) || std::isnan(op2))
1112            return 2.0;
1113        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114    }
1115
1116    static float
1117    vacgeFunc(float op1, float op2)
1118    {
1119        if (std::isnan(op1) || std::isnan(op2))
1120            return 2.0;
1121        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122    }
1123'''
1124
1125    exec_output += vcompares + vacomparesG
1126
1127    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130    signedTypes = smallSignedTypes + ("int64_t",)
1131    smallTypes = smallUnsignedTypes + smallSignedTypes
1132    allTypes = unsignedTypes + signedTypes
1133
1134    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135                          readDest=False, pairwise=False):
1136        global header_output, exec_output
1137        eWalkCode = simdEnabledCheckCode + '''
1138        RegVect srcReg1, srcReg2, destReg;
1139        '''
1140        for reg in range(rCount):
1141            eWalkCode += '''
1142                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144            ''' % { "reg" : reg }
1145            if readDest:
1146                eWalkCode += '''
1147                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148                ''' % { "reg" : reg }
1149        readDestCode = ''
1150        if readDest:
1151            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152        if pairwise:
1153            eWalkCode += '''
1154            for (unsigned i = 0; i < eCount; i++) {
1155                Element srcElem1 = gtoh(2 * i < eCount ?
1156                                        srcReg1.elements[2 * i] :
1157                                        srcReg2.elements[2 * i - eCount]);
1158                Element srcElem2 = gtoh(2 * i < eCount ?
1159                                        srcReg1.elements[2 * i + 1] :
1160                                        srcReg2.elements[2 * i + 1 - eCount]);
1161                Element destElem;
1162                %(readDest)s
1163                %(op)s
1164                destReg.elements[i] = htog(destElem);
1165            }
1166            ''' % { "op" : op, "readDest" : readDestCode }
1167        else:
1168            eWalkCode += '''
1169            for (unsigned i = 0; i < eCount; i++) {
1170                Element srcElem1 = gtoh(srcReg1.elements[i]);
1171                Element srcElem2 = gtoh(srcReg2.elements[i]);
1172                Element destElem;
1173                %(readDest)s
1174                %(op)s
1175                destReg.elements[i] = htog(destElem);
1176            }
1177            ''' % { "op" : op, "readDest" : readDestCode }
1178        for reg in range(rCount):
1179            eWalkCode += '''
1180            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181            ''' % { "reg" : reg }
1182        iop = InstObjParams(name, Name,
1183                            "RegRegRegOp",
1184                            { "code": eWalkCode,
1185                              "r_count": rCount,
1186                              "predicate_test": predicateTest,
1187                              "op_class": opClass }, [])
1188        header_output += NeonRegRegRegOpDeclare.subst(iop)
1189        exec_output += NeonEqualRegExecute.subst(iop)
1190        for type in types:
1191            substDict = { "targs" : type,
1192                          "class_name" : Name }
1193            exec_output += NeonExecDeclare.subst(substDict)
1194
1195    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196                            readDest=False, pairwise=False, toInt=False):
1197        global header_output, exec_output
1198        eWalkCode = simdEnabledCheckCode + '''
1199        typedef FloatReg FloatVect[rCount];
1200        FloatVect srcRegs1, srcRegs2;
1201        '''
1202        if toInt:
1203            eWalkCode += 'RegVect destRegs;\n'
1204        else:
1205            eWalkCode += 'FloatVect destRegs;\n'
1206        for reg in range(rCount):
1207            eWalkCode += '''
1208                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210            ''' % { "reg" : reg }
1211            if readDest:
1212                if toInt:
1213                    eWalkCode += '''
1214                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215                    ''' % { "reg" : reg }
1216                else:
1217                    eWalkCode += '''
1218                        destRegs[%(reg)d] = FpDestP%(reg)d;
1219                    ''' % { "reg" : reg }
1220        readDestCode = ''
1221        if readDest:
1222            readDestCode = 'destReg = destRegs[r];'
1223        destType = 'FloatReg'
1224        writeDest = 'destRegs[r] = destReg;'
1225        if toInt:
1226            destType = 'FloatRegBits'
1227            writeDest = 'destRegs.regs[r] = destReg;'
1228        if pairwise:
1229            eWalkCode += '''
1230            for (unsigned r = 0; r < rCount; r++) {
1231                FloatReg srcReg1 = (2 * r < rCount) ?
1232                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1233                FloatReg srcReg2 = (2 * r < rCount) ?
1234                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235                %(destType)s destReg;
1236                %(readDest)s
1237                %(op)s
1238                %(writeDest)s
1239            }
1240            ''' % { "op" : op,
1241                    "readDest" : readDestCode,
1242                    "destType" : destType,
1243                    "writeDest" : writeDest }
1244        else:
1245            eWalkCode += '''
1246            for (unsigned r = 0; r < rCount; r++) {
1247                FloatReg srcReg1 = srcRegs1[r];
1248                FloatReg srcReg2 = srcRegs2[r];
1249                %(destType)s destReg;
1250                %(readDest)s
1251                %(op)s
1252                %(writeDest)s
1253            }
1254            ''' % { "op" : op,
1255                    "readDest" : readDestCode,
1256                    "destType" : destType,
1257                    "writeDest" : writeDest }
1258        for reg in range(rCount):
1259            if toInt:
1260                eWalkCode += '''
1261                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262                ''' % { "reg" : reg }
1263            else:
1264                eWalkCode += '''
1265                FpDestP%(reg)d = destRegs[%(reg)d];
1266                ''' % { "reg" : reg }
1267        iop = InstObjParams(name, Name,
1268                            "FpRegRegRegOp",
1269                            { "code": eWalkCode,
1270                              "r_count": rCount,
1271                              "predicate_test": predicateTest,
1272                              "op_class": opClass }, [])
1273        header_output += NeonRegRegRegOpDeclare.subst(iop)
1274        exec_output += NeonEqualRegExecute.subst(iop)
1275        for type in types:
1276            substDict = { "targs" : type,
1277                          "class_name" : Name }
1278            exec_output += NeonExecDeclare.subst(substDict)
1279
1280    def threeUnequalRegInst(name, Name, opClass, types, op,
1281                            bigSrc1, bigSrc2, bigDest, readDest):
1282        global header_output, exec_output
1283        src1Cnt = src2Cnt = destCnt = 2
1284        src1Prefix = src2Prefix = destPrefix = ''
1285        if bigSrc1:
1286            src1Cnt = 4
1287            src1Prefix = 'Big'
1288        if bigSrc2:
1289            src2Cnt = 4
1290            src2Prefix = 'Big'
1291        if bigDest:
1292            destCnt = 4
1293            destPrefix = 'Big'
1294        eWalkCode = simdEnabledCheckCode + '''
1295            %sRegVect srcReg1;
1296            %sRegVect srcReg2;
1297            %sRegVect destReg;
1298        ''' % (src1Prefix, src2Prefix, destPrefix)
1299        for reg in range(src1Cnt):
1300            eWalkCode += '''
1301                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302            ''' % { "reg" : reg }
1303        for reg in range(src2Cnt):
1304            eWalkCode += '''
1305                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306            ''' % { "reg" : reg }
1307        if readDest:
1308            for reg in range(destCnt):
1309                eWalkCode += '''
1310                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311                ''' % { "reg" : reg }
1312        readDestCode = ''
1313        if readDest:
1314            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315        eWalkCode += '''
1316        for (unsigned i = 0; i < eCount; i++) {
1317            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319            %(destPrefix)sElement destElem;
1320            %(readDest)s
1321            %(op)s
1322            destReg.elements[i] = htog(destElem);
1323        }
1324        ''' % { "op" : op, "readDest" : readDestCode,
1325                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326                "destPrefix" : destPrefix }
1327        for reg in range(destCnt):
1328            eWalkCode += '''
1329            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330            ''' % { "reg" : reg }
1331        iop = InstObjParams(name, Name,
1332                            "RegRegRegOp",
1333                            { "code": eWalkCode,
1334                              "r_count": 2,
1335                              "predicate_test": predicateTest,
1336                              "op_class": opClass }, [])
1337        header_output += NeonRegRegRegOpDeclare.subst(iop)
1338        exec_output += NeonUnequalRegExecute.subst(iop)
1339        for type in types:
1340            substDict = { "targs" : type,
1341                          "class_name" : Name }
1342            exec_output += NeonExecDeclare.subst(substDict)
1343
1344    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345        threeUnequalRegInst(name, Name, opClass, types, op,
1346                            True, True, False, readDest)
1347
1348    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349        threeUnequalRegInst(name, Name, opClass, types, op,
1350                            False, False, True, readDest)
1351
1352    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353        threeUnequalRegInst(name, Name, opClass, types, op,
1354                            True, False, True, readDest)
1355
1356    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357        global header_output, exec_output
1358        eWalkCode = simdEnabledCheckCode + '''
1359        RegVect srcReg1, srcReg2, destReg;
1360        '''
1361        for reg in range(rCount):
1362            eWalkCode += '''
1363                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365            ''' % { "reg" : reg }
1366            if readDest:
1367                eWalkCode += '''
1368                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369                ''' % { "reg" : reg }
1370        readDestCode = ''
1371        if readDest:
1372            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373        eWalkCode += '''
1374        if (imm < 0 && imm >= eCount) {
1375            fault = new UndefinedInstruction(machInst, false, mnemonic);
1376        } else {
1377            for (unsigned i = 0; i < eCount; i++) {
1378                Element srcElem1 = gtoh(srcReg1.elements[i]);
1379                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1380                Element destElem;
1381                %(readDest)s
1382                %(op)s
1383                destReg.elements[i] = htog(destElem);
1384            }
1385        }
1386        ''' % { "op" : op, "readDest" : readDestCode }
1387        for reg in range(rCount):
1388            eWalkCode += '''
1389            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1390            ''' % { "reg" : reg }
1391        iop = InstObjParams(name, Name,
1392                            "RegRegRegImmOp",
1393                            { "code": eWalkCode,
1394                              "r_count": rCount,
1395                              "predicate_test": predicateTest,
1396                              "op_class": opClass }, [])
1397        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1398        exec_output += NeonEqualRegExecute.subst(iop)
1399        for type in types:
1400            substDict = { "targs" : type,
1401                          "class_name" : Name }
1402            exec_output += NeonExecDeclare.subst(substDict)
1403
1404    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1405        global header_output, exec_output
1406        rCount = 2
1407        eWalkCode = simdEnabledCheckCode + '''
1408        RegVect srcReg1, srcReg2;
1409        BigRegVect destReg;
1410        '''
1411        for reg in range(rCount):
1412            eWalkCode += '''
1413                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1414                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1415            ''' % { "reg" : reg }
1416        if readDest:
1417            for reg in range(2 * rCount):
1418                eWalkCode += '''
1419                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1420                ''' % { "reg" : reg }
1421        readDestCode = ''
1422        if readDest:
1423            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1424        eWalkCode += '''
1425        if (imm < 0 && imm >= eCount) {
1426            fault = new UndefinedInstruction(machInst, false, mnemonic);
1427        } else {
1428            for (unsigned i = 0; i < eCount; i++) {
1429                Element srcElem1 = gtoh(srcReg1.elements[i]);
1430                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1431                BigElement destElem;
1432                %(readDest)s
1433                %(op)s
1434                destReg.elements[i] = htog(destElem);
1435            }
1436        }
1437        ''' % { "op" : op, "readDest" : readDestCode }
1438        for reg in range(2 * rCount):
1439            eWalkCode += '''
1440            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1441            ''' % { "reg" : reg }
1442        iop = InstObjParams(name, Name,
1443                            "RegRegRegImmOp",
1444                            { "code": eWalkCode,
1445                              "r_count": rCount,
1446                              "predicate_test": predicateTest,
1447                              "op_class": opClass }, [])
1448        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1449        exec_output += NeonUnequalRegExecute.subst(iop)
1450        for type in types:
1451            substDict = { "targs" : type,
1452                          "class_name" : Name }
1453            exec_output += NeonExecDeclare.subst(substDict)
1454
1455    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1456        global header_output, exec_output
1457        eWalkCode = simdEnabledCheckCode + '''
1458        typedef FloatReg FloatVect[rCount];
1459        FloatVect srcRegs1, srcRegs2, destRegs;
1460        '''
1461        for reg in range(rCount):
1462            eWalkCode += '''
1463                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1464                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1465            ''' % { "reg" : reg }
1466            if readDest:
1467                eWalkCode += '''
1468                    destRegs[%(reg)d] = FpDestP%(reg)d;
1469                ''' % { "reg" : reg }
1470        readDestCode = ''
1471        if readDest:
1472            readDestCode = 'destReg = destRegs[i];'
1473        eWalkCode += '''
1474        if (imm < 0 && imm >= eCount) {
1475            fault = new UndefinedInstruction(machInst, false, mnemonic);
1476        } else {
1477            for (unsigned i = 0; i < rCount; i++) {
1478                FloatReg srcReg1 = srcRegs1[i];
1479                FloatReg srcReg2 = srcRegs2[imm];
1480                FloatReg destReg;
1481                %(readDest)s
1482                %(op)s
1483                destRegs[i] = destReg;
1484            }
1485        }
1486        ''' % { "op" : op, "readDest" : readDestCode }
1487        for reg in range(rCount):
1488            eWalkCode += '''
1489            FpDestP%(reg)d = destRegs[%(reg)d];
1490            ''' % { "reg" : reg }
1491        iop = InstObjParams(name, Name,
1492                            "FpRegRegRegImmOp",
1493                            { "code": eWalkCode,
1494                              "r_count": rCount,
1495                              "predicate_test": predicateTest,
1496                              "op_class": opClass }, [])
1497        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1498        exec_output += NeonEqualRegExecute.subst(iop)
1499        for type in types:
1500            substDict = { "targs" : type,
1501                          "class_name" : Name }
1502            exec_output += NeonExecDeclare.subst(substDict)
1503
1504    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1505            readDest=False, toInt=False, fromInt=False):
1506        global header_output, exec_output
1507        eWalkCode = simdEnabledCheckCode + '''
1508        RegVect srcRegs1, destRegs;
1509        '''
1510        for reg in range(rCount):
1511            eWalkCode += '''
1512                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1513            ''' % { "reg" : reg }
1514            if readDest:
1515                eWalkCode += '''
1516                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1517                ''' % { "reg" : reg }
1518        readDestCode = ''
1519        if readDest:
1520            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1521            if toInt:
1522                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1523        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1524        if fromInt:
1525            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1526        declDest = 'Element destElem;'
1527        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1528        if toInt:
1529            declDest = 'FloatRegBits destReg;'
1530            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1531        eWalkCode += '''
1532        for (unsigned i = 0; i < eCount; i++) {
1533            %(readOp)s
1534            %(declDest)s
1535            %(readDest)s
1536            %(op)s
1537            %(writeDest)s
1538        }
1539        ''' % { "readOp" : readOpCode,
1540                "declDest" : declDest,
1541                "readDest" : readDestCode,
1542                "op" : op,
1543                "writeDest" : writeDestCode }
1544        for reg in range(rCount):
1545            eWalkCode += '''
1546            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1547            ''' % { "reg" : reg }
1548        iop = InstObjParams(name, Name,
1549                            "RegRegImmOp",
1550                            { "code": eWalkCode,
1551                              "r_count": rCount,
1552                              "predicate_test": predicateTest,
1553                              "op_class": opClass }, [])
1554        header_output += NeonRegRegImmOpDeclare.subst(iop)
1555        exec_output += NeonEqualRegExecute.subst(iop)
1556        for type in types:
1557            substDict = { "targs" : type,
1558                          "class_name" : Name }
1559            exec_output += NeonExecDeclare.subst(substDict)
1560
1561    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1562        global header_output, exec_output
1563        eWalkCode = simdEnabledCheckCode + '''
1564        BigRegVect srcReg1;
1565        RegVect destReg;
1566        '''
1567        for reg in range(4):
1568            eWalkCode += '''
1569                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1570            ''' % { "reg" : reg }
1571        if readDest:
1572            for reg in range(2):
1573                eWalkCode += '''
1574                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1575                ''' % { "reg" : reg }
1576        readDestCode = ''
1577        if readDest:
1578            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1579        eWalkCode += '''
1580        for (unsigned i = 0; i < eCount; i++) {
1581            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1582            Element destElem;
1583            %(readDest)s
1584            %(op)s
1585            destReg.elements[i] = htog(destElem);
1586        }
1587        ''' % { "op" : op, "readDest" : readDestCode }
1588        for reg in range(2):
1589            eWalkCode += '''
1590            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1591            ''' % { "reg" : reg }
1592        iop = InstObjParams(name, Name,
1593                            "RegRegImmOp",
1594                            { "code": eWalkCode,
1595                              "r_count": 2,
1596                              "predicate_test": predicateTest,
1597                              "op_class": opClass }, [])
1598        header_output += NeonRegRegImmOpDeclare.subst(iop)
1599        exec_output += NeonUnequalRegExecute.subst(iop)
1600        for type in types:
1601            substDict = { "targs" : type,
1602                          "class_name" : Name }
1603            exec_output += NeonExecDeclare.subst(substDict)
1604
1605    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1606        global header_output, exec_output
1607        eWalkCode = simdEnabledCheckCode + '''
1608        RegVect srcReg1;
1609        BigRegVect destReg;
1610        '''
1611        for reg in range(2):
1612            eWalkCode += '''
1613                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1614            ''' % { "reg" : reg }
1615        if readDest:
1616            for reg in range(4):
1617                eWalkCode += '''
1618                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1619                ''' % { "reg" : reg }
1620        readDestCode = ''
1621        if readDest:
1622            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1623        eWalkCode += '''
1624        for (unsigned i = 0; i < eCount; i++) {
1625            Element srcElem1 = gtoh(srcReg1.elements[i]);
1626            BigElement destElem;
1627            %(readDest)s
1628            %(op)s
1629            destReg.elements[i] = htog(destElem);
1630        }
1631        ''' % { "op" : op, "readDest" : readDestCode }
1632        for reg in range(4):
1633            eWalkCode += '''
1634            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1635            ''' % { "reg" : reg }
1636        iop = InstObjParams(name, Name,
1637                            "RegRegImmOp",
1638                            { "code": eWalkCode,
1639                              "r_count": 2,
1640                              "predicate_test": predicateTest,
1641                              "op_class": opClass }, [])
1642        header_output += NeonRegRegImmOpDeclare.subst(iop)
1643        exec_output += NeonUnequalRegExecute.subst(iop)
1644        for type in types:
1645            substDict = { "targs" : type,
1646                          "class_name" : Name }
1647            exec_output += NeonExecDeclare.subst(substDict)
1648
1649    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1650        global header_output, exec_output
1651        eWalkCode = simdEnabledCheckCode + '''
1652        RegVect srcReg1, destReg;
1653        '''
1654        for reg in range(rCount):
1655            eWalkCode += '''
1656                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1657            ''' % { "reg" : reg }
1658            if readDest:
1659                eWalkCode += '''
1660                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1661                ''' % { "reg" : reg }
1662        readDestCode = ''
1663        if readDest:
1664            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1665        eWalkCode += '''
1666        for (unsigned i = 0; i < eCount; i++) {
1667            unsigned j = i;
1668            Element srcElem1 = gtoh(srcReg1.elements[i]);
1669            Element destElem;
1670            %(readDest)s
1671            %(op)s
1672            destReg.elements[j] = htog(destElem);
1673        }
1674        ''' % { "op" : op, "readDest" : readDestCode }
1675        for reg in range(rCount):
1676            eWalkCode += '''
1677            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1678            ''' % { "reg" : reg }
1679        iop = InstObjParams(name, Name,
1680                            "RegRegOp",
1681                            { "code": eWalkCode,
1682                              "r_count": rCount,
1683                              "predicate_test": predicateTest,
1684                              "op_class": opClass }, [])
1685        header_output += NeonRegRegOpDeclare.subst(iop)
1686        exec_output += NeonEqualRegExecute.subst(iop)
1687        for type in types:
1688            substDict = { "targs" : type,
1689                          "class_name" : Name }
1690            exec_output += NeonExecDeclare.subst(substDict)
1691
1692    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1693        global header_output, exec_output
1694        eWalkCode = simdEnabledCheckCode + '''
1695        RegVect srcReg1, destReg;
1696        '''
1697        for reg in range(rCount):
1698            eWalkCode += '''
1699                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1700            ''' % { "reg" : reg }
1701            if readDest:
1702                eWalkCode += '''
1703                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1704                ''' % { "reg" : reg }
1705        readDestCode = ''
1706        if readDest:
1707            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1708        eWalkCode += '''
1709        for (unsigned i = 0; i < eCount; i++) {
1710            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1711            Element destElem;
1712            %(readDest)s
1713            %(op)s
1714            destReg.elements[i] = htog(destElem);
1715        }
1716        ''' % { "op" : op, "readDest" : readDestCode }
1717        for reg in range(rCount):
1718            eWalkCode += '''
1719            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1720            ''' % { "reg" : reg }
1721        iop = InstObjParams(name, Name,
1722                            "RegRegImmOp",
1723                            { "code": eWalkCode,
1724                              "r_count": rCount,
1725                              "predicate_test": predicateTest,
1726                              "op_class": opClass }, [])
1727        header_output += NeonRegRegImmOpDeclare.subst(iop)
1728        exec_output += NeonEqualRegExecute.subst(iop)
1729        for type in types:
1730            substDict = { "targs" : type,
1731                          "class_name" : Name }
1732            exec_output += NeonExecDeclare.subst(substDict)
1733
1734    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1735        global header_output, exec_output
1736        eWalkCode = simdEnabledCheckCode + '''
1737        RegVect srcReg1, destReg;
1738        '''
1739        for reg in range(rCount):
1740            eWalkCode += '''
1741                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1742                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1743            ''' % { "reg" : reg }
1744            if readDest:
1745                eWalkCode += '''
1746                ''' % { "reg" : reg }
1747        readDestCode = ''
1748        if readDest:
1749            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1750        eWalkCode += op
1751        for reg in range(rCount):
1752            eWalkCode += '''
1753            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1754            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1755            ''' % { "reg" : reg }
1756        iop = InstObjParams(name, Name,
1757                            "RegRegOp",
1758                            { "code": eWalkCode,
1759                              "r_count": rCount,
1760                              "predicate_test": predicateTest,
1761                              "op_class": opClass }, [])
1762        header_output += NeonRegRegOpDeclare.subst(iop)
1763        exec_output += NeonEqualRegExecute.subst(iop)
1764        for type in types:
1765            substDict = { "targs" : type,
1766                          "class_name" : Name }
1767            exec_output += NeonExecDeclare.subst(substDict)
1768
1769    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1770            readDest=False, toInt=False):
1771        global header_output, exec_output
1772        eWalkCode = simdEnabledCheckCode + '''
1773        typedef FloatReg FloatVect[rCount];
1774        FloatVect srcRegs1;
1775        '''
1776        if toInt:
1777            eWalkCode += 'RegVect destRegs;\n'
1778        else:
1779            eWalkCode += 'FloatVect destRegs;\n'
1780        for reg in range(rCount):
1781            eWalkCode += '''
1782                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1783            ''' % { "reg" : reg }
1784            if readDest:
1785                if toInt:
1786                    eWalkCode += '''
1787                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1788                    ''' % { "reg" : reg }
1789                else:
1790                    eWalkCode += '''
1791                        destRegs[%(reg)d] = FpDestP%(reg)d;
1792                    ''' % { "reg" : reg }
1793        readDestCode = ''
1794        if readDest:
1795            readDestCode = 'destReg = destRegs[i];'
1796        destType = 'FloatReg'
1797        writeDest = 'destRegs[r] = destReg;'
1798        if toInt:
1799            destType = 'FloatRegBits'
1800            writeDest = 'destRegs.regs[r] = destReg;'
1801        eWalkCode += '''
1802        for (unsigned r = 0; r < rCount; r++) {
1803            FloatReg srcReg1 = srcRegs1[r];
1804            %(destType)s destReg;
1805            %(readDest)s
1806            %(op)s
1807            %(writeDest)s
1808        }
1809        ''' % { "op" : op,
1810                "readDest" : readDestCode,
1811                "destType" : destType,
1812                "writeDest" : writeDest }
1813        for reg in range(rCount):
1814            if toInt:
1815                eWalkCode += '''
1816                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1817                ''' % { "reg" : reg }
1818            else:
1819                eWalkCode += '''
1820                FpDestP%(reg)d = destRegs[%(reg)d];
1821                ''' % { "reg" : reg }
1822        iop = InstObjParams(name, Name,
1823                            "FpRegRegOp",
1824                            { "code": eWalkCode,
1825                              "r_count": rCount,
1826                              "predicate_test": predicateTest,
1827                              "op_class": opClass }, [])
1828        header_output += NeonRegRegOpDeclare.subst(iop)
1829        exec_output += NeonEqualRegExecute.subst(iop)
1830        for type in types:
1831            substDict = { "targs" : type,
1832                          "class_name" : Name }
1833            exec_output += NeonExecDeclare.subst(substDict)
1834
1835    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1836        global header_output, exec_output
1837        eWalkCode = simdEnabledCheckCode + '''
1838        RegVect srcRegs;
1839        BigRegVect destReg;
1840        '''
1841        for reg in range(rCount):
1842            eWalkCode += '''
1843                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1844            ''' % { "reg" : reg }
1845            if readDest:
1846                eWalkCode += '''
1847                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1848                ''' % { "reg" : reg }
1849        readDestCode = ''
1850        if readDest:
1851            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1852        eWalkCode += '''
1853        for (unsigned i = 0; i < eCount / 2; i++) {
1854            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1855            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1856            BigElement destElem;
1857            %(readDest)s
1858            %(op)s
1859            destReg.elements[i] = htog(destElem);
1860        }
1861        ''' % { "op" : op, "readDest" : readDestCode }
1862        for reg in range(rCount):
1863            eWalkCode += '''
1864            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1865            ''' % { "reg" : reg }
1866        iop = InstObjParams(name, Name,
1867                            "RegRegOp",
1868                            { "code": eWalkCode,
1869                              "r_count": rCount,
1870                              "predicate_test": predicateTest,
1871                              "op_class": opClass }, [])
1872        header_output += NeonRegRegOpDeclare.subst(iop)
1873        exec_output += NeonUnequalRegExecute.subst(iop)
1874        for type in types:
1875            substDict = { "targs" : type,
1876                          "class_name" : Name }
1877            exec_output += NeonExecDeclare.subst(substDict)
1878
1879    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1880        global header_output, exec_output
1881        eWalkCode = simdEnabledCheckCode + '''
1882        BigRegVect srcReg1;
1883        RegVect destReg;
1884        '''
1885        for reg in range(4):
1886            eWalkCode += '''
1887                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1888            ''' % { "reg" : reg }
1889        if readDest:
1890            for reg in range(2):
1891                eWalkCode += '''
1892                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1893                ''' % { "reg" : reg }
1894        readDestCode = ''
1895        if readDest:
1896            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1897        eWalkCode += '''
1898        for (unsigned i = 0; i < eCount; i++) {
1899            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1900            Element destElem;
1901            %(readDest)s
1902            %(op)s
1903            destReg.elements[i] = htog(destElem);
1904        }
1905        ''' % { "op" : op, "readDest" : readDestCode }
1906        for reg in range(2):
1907            eWalkCode += '''
1908            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1909            ''' % { "reg" : reg }
1910        iop = InstObjParams(name, Name,
1911                            "RegRegOp",
1912                            { "code": eWalkCode,
1913                              "r_count": 2,
1914                              "predicate_test": predicateTest,
1915                              "op_class": opClass }, [])
1916        header_output += NeonRegRegOpDeclare.subst(iop)
1917        exec_output += NeonUnequalRegExecute.subst(iop)
1918        for type in types:
1919            substDict = { "targs" : type,
1920                          "class_name" : Name }
1921            exec_output += NeonExecDeclare.subst(substDict)
1922
1923    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1924        global header_output, exec_output
1925        eWalkCode = simdEnabledCheckCode + '''
1926        RegVect destReg;
1927        '''
1928        if readDest:
1929            for reg in range(rCount):
1930                eWalkCode += '''
1931                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1932                ''' % { "reg" : reg }
1933        readDestCode = ''
1934        if readDest:
1935            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1936        eWalkCode += '''
1937        for (unsigned i = 0; i < eCount; i++) {
1938            Element destElem;
1939            %(readDest)s
1940            %(op)s
1941            destReg.elements[i] = htog(destElem);
1942        }
1943        ''' % { "op" : op, "readDest" : readDestCode }
1944        for reg in range(rCount):
1945            eWalkCode += '''
1946            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1947            ''' % { "reg" : reg }
1948        iop = InstObjParams(name, Name,
1949                            "RegImmOp",
1950                            { "code": eWalkCode,
1951                              "r_count": rCount,
1952                              "predicate_test": predicateTest,
1953                              "op_class": opClass }, [])
1954        header_output += NeonRegImmOpDeclare.subst(iop)
1955        exec_output += NeonEqualRegExecute.subst(iop)
1956        for type in types:
1957            substDict = { "targs" : type,
1958                          "class_name" : Name }
1959            exec_output += NeonExecDeclare.subst(substDict)
1960
1961    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1962        global header_output, exec_output
1963        eWalkCode = simdEnabledCheckCode + '''
1964        RegVect srcReg1;
1965        BigRegVect destReg;
1966        '''
1967        for reg in range(2):
1968            eWalkCode += '''
1969                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1970            ''' % { "reg" : reg }
1971        if readDest:
1972            for reg in range(4):
1973                eWalkCode += '''
1974                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1975                ''' % { "reg" : reg }
1976        readDestCode = ''
1977        if readDest:
1978            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1979        eWalkCode += '''
1980        for (unsigned i = 0; i < eCount; i++) {
1981            Element srcElem1 = gtoh(srcReg1.elements[i]);
1982            BigElement destElem;
1983            %(readDest)s
1984            %(op)s
1985            destReg.elements[i] = htog(destElem);
1986        }
1987        ''' % { "op" : op, "readDest" : readDestCode }
1988        for reg in range(4):
1989            eWalkCode += '''
1990            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1991            ''' % { "reg" : reg }
1992        iop = InstObjParams(name, Name,
1993                            "RegRegOp",
1994                            { "code": eWalkCode,
1995                              "r_count": 2,
1996                              "predicate_test": predicateTest,
1997                              "op_class": opClass }, [])
1998        header_output += NeonRegRegOpDeclare.subst(iop)
1999        exec_output += NeonUnequalRegExecute.subst(iop)
2000        for type in types:
2001            substDict = { "targs" : type,
2002                          "class_name" : Name }
2003            exec_output += NeonExecDeclare.subst(substDict)
2004
2005    vhaddCode = '''
2006        Element carryBit =
2007            (((unsigned)srcElem1 & 0x1) +
2008             ((unsigned)srcElem2 & 0x1)) >> 1;
2009        // Use division instead of a shift to ensure the sign extension works
2010        // right. The compiler will figure out if it can be a shift. Mask the
2011        // inputs so they get truncated correctly.
2012        destElem = (((srcElem1 & ~(Element)1) / 2) +
2013                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2014    '''
2015    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2016    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2017
2018    vrhaddCode = '''
2019        Element carryBit =
2020            (((unsigned)srcElem1 & 0x1) +
2021             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2022        // Use division instead of a shift to ensure the sign extension works
2023        // right. The compiler will figure out if it can be a shift. Mask the
2024        // inputs so they get truncated correctly.
2025        destElem = (((srcElem1 & ~(Element)1) / 2) +
2026                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2027    '''
2028    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2029    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2030
2031    vhsubCode = '''
2032        Element barrowBit =
2033            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2034        // Use division instead of a shift to ensure the sign extension works
2035        // right. The compiler will figure out if it can be a shift. Mask the
2036        // inputs so they get truncated correctly.
2037        destElem = (((srcElem1 & ~(Element)1) / 2) -
2038                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2039    '''
2040    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2041    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2042
2043    vandCode = '''
2044        destElem = srcElem1 & srcElem2;
2045    '''
2046    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2047    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2048
2049    vbicCode = '''
2050        destElem = srcElem1 & ~srcElem2;
2051    '''
2052    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2053    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2054
2055    vorrCode = '''
2056        destElem = srcElem1 | srcElem2;
2057    '''
2058    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2059    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2060
2061    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2062    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2063
2064    vornCode = '''
2065        destElem = srcElem1 | ~srcElem2;
2066    '''
2067    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2068    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2069
2070    veorCode = '''
2071        destElem = srcElem1 ^ srcElem2;
2072    '''
2073    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2074    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2075
2076    vbifCode = '''
2077        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2078    '''
2079    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2080    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2081    vbitCode = '''
2082        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2083    '''
2084    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2085    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2086    vbslCode = '''
2087        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2088    '''
2089    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2090    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2091
2092    vmaxCode = '''
2093        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2094    '''
2095    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2096    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2097
2098    vminCode = '''
2099        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2100    '''
2101    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2102    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2103
2104    vaddCode = '''
2105        destElem = srcElem1 + srcElem2;
2106    '''
2107    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2108    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2109
2110    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2111                      2, vaddCode, pairwise=True)
2112    vaddlwCode = '''
2113        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2114    '''
2115    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2116    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2117    vaddhnCode = '''
2118        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2119                   (sizeof(Element) * 8);
2120    '''
2121    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2122    vraddhnCode = '''
2123        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2124                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2125                   (sizeof(Element) * 8);
2126    '''
2127    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2128
2129    vsubCode = '''
2130        destElem = srcElem1 - srcElem2;
2131    '''
2132    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2133    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2134    vsublwCode = '''
2135        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2136    '''
2137    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2138    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2139
2140    vqaddUCode = '''
2141        destElem = srcElem1 + srcElem2;
2142        FPSCR fpscr = (FPSCR) FpscrQc;
2143        if (destElem < srcElem1 || destElem < srcElem2) {
2144            destElem = (Element)(-1);
2145            fpscr.qc = 1;
2146        }
2147        FpscrQc = fpscr;
2148    '''
2149    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2150    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2151    vsubhnCode = '''
2152        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2153                   (sizeof(Element) * 8);
2154    '''
2155    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2156    vrsubhnCode = '''
2157        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2158                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2159                   (sizeof(Element) * 8);
2160    '''
2161    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2162
2163    vqaddSCode = '''
2164        destElem = srcElem1 + srcElem2;
2165        FPSCR fpscr = (FPSCR) FpscrQc;
2166        bool negDest = (destElem < 0);
2167        bool negSrc1 = (srcElem1 < 0);
2168        bool negSrc2 = (srcElem2 < 0);
2169        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2170            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2171            if (negDest)
2172                destElem -= 1;
2173            fpscr.qc = 1;
2174        }
2175        FpscrQc = fpscr;
2176    '''
2177    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2178    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2179
2180    vqsubUCode = '''
2181        destElem = srcElem1 - srcElem2;
2182        FPSCR fpscr = (FPSCR) FpscrQc;
2183        if (destElem > srcElem1) {
2184            destElem = 0;
2185            fpscr.qc = 1;
2186        }
2187        FpscrQc = fpscr;
2188    '''
2189    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2190    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2191
2192    vqsubSCode = '''
2193        destElem = srcElem1 - srcElem2;
2194        FPSCR fpscr = (FPSCR) FpscrQc;
2195        bool negDest = (destElem < 0);
2196        bool negSrc1 = (srcElem1 < 0);
2197        bool posSrc2 = (srcElem2 >= 0);
2198        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2199            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2200            if (negDest)
2201                destElem -= 1;
2202            fpscr.qc = 1;
2203        }
2204        FpscrQc = fpscr;
2205    '''
2206    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2207    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2208
2209    vcgtCode = '''
2210        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2211    '''
2212    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2213    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2214
2215    vcgeCode = '''
2216        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2217    '''
2218    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2219    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2220
2221    vceqCode = '''
2222        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2223    '''
2224    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2225    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2226
2227    vshlCode = '''
2228        int16_t shiftAmt = (int8_t)srcElem2;
2229        if (shiftAmt < 0) {
2230            shiftAmt = -shiftAmt;
2231            if (shiftAmt >= sizeof(Element) * 8) {
2232                shiftAmt = sizeof(Element) * 8 - 1;
2233                destElem = 0;
2234            } else {
2235                destElem = (srcElem1 >> shiftAmt);
2236            }
2237            // Make sure the right shift sign extended when it should.
2238            if (ltz(srcElem1) && !ltz(destElem)) {
2239                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2240                                             1 - shiftAmt));
2241            }
2242        } else {
2243            if (shiftAmt >= sizeof(Element) * 8) {
2244                destElem = 0;
2245            } else {
2246                destElem = srcElem1 << shiftAmt;
2247            }
2248        }
2249    '''
2250    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2251    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2252
2253    vrshlCode = '''
2254        int16_t shiftAmt = (int8_t)srcElem2;
2255        if (shiftAmt < 0) {
2256            shiftAmt = -shiftAmt;
2257            Element rBit = 0;
2258            if (shiftAmt <= sizeof(Element) * 8)
2259                rBit = bits(srcElem1, shiftAmt - 1);
2260            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2261                rBit = 1;
2262            if (shiftAmt >= sizeof(Element) * 8) {
2263                shiftAmt = sizeof(Element) * 8 - 1;
2264                destElem = 0;
2265            } else {
2266                destElem = (srcElem1 >> shiftAmt);
2267            }
2268            // Make sure the right shift sign extended when it should.
2269            if (ltz(srcElem1) && !ltz(destElem)) {
2270                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2271                                             1 - shiftAmt));
2272            }
2273            destElem += rBit;
2274        } else if (shiftAmt > 0) {
2275            if (shiftAmt >= sizeof(Element) * 8) {
2276                destElem = 0;
2277            } else {
2278                destElem = srcElem1 << shiftAmt;
2279            }
2280        } else {
2281            destElem = srcElem1;
2282        }
2283    '''
2284    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2285    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2286
2287    vqshlUCode = '''
2288        int16_t shiftAmt = (int8_t)srcElem2;
2289        FPSCR fpscr = (FPSCR) FpscrQc;
2290        if (shiftAmt < 0) {
2291            shiftAmt = -shiftAmt;
2292            if (shiftAmt >= sizeof(Element) * 8) {
2293                shiftAmt = sizeof(Element) * 8 - 1;
2294                destElem = 0;
2295            } else {
2296                destElem = (srcElem1 >> shiftAmt);
2297            }
2298        } else if (shiftAmt > 0) {
2299            if (shiftAmt >= sizeof(Element) * 8) {
2300                if (srcElem1 != 0) {
2301                    destElem = mask(sizeof(Element) * 8);
2302                    fpscr.qc = 1;
2303                } else {
2304                    destElem = 0;
2305                }
2306            } else {
2307                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2308                            sizeof(Element) * 8 - shiftAmt)) {
2309                    destElem = mask(sizeof(Element) * 8);
2310                    fpscr.qc = 1;
2311                } else {
2312                    destElem = srcElem1 << shiftAmt;
2313                }
2314            }
2315        } else {
2316            destElem = srcElem1;
2317        }
2318        FpscrQc = fpscr;
2319    '''
2320    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2321    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2322
2323    vqshlSCode = '''
2324        int16_t shiftAmt = (int8_t)srcElem2;
2325        FPSCR fpscr = (FPSCR) FpscrQc;
2326        if (shiftAmt < 0) {
2327            shiftAmt = -shiftAmt;
2328            if (shiftAmt >= sizeof(Element) * 8) {
2329                shiftAmt = sizeof(Element) * 8 - 1;
2330                destElem = 0;
2331            } else {
2332                destElem = (srcElem1 >> shiftAmt);
2333            }
2334            // Make sure the right shift sign extended when it should.
2335            if (srcElem1 < 0 && destElem >= 0) {
2336                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2337                                             1 - shiftAmt));
2338            }
2339        } else if (shiftAmt > 0) {
2340            bool sat = false;
2341            if (shiftAmt >= sizeof(Element) * 8) {
2342                if (srcElem1 != 0)
2343                    sat = true;
2344                else
2345                    destElem = 0;
2346            } else {
2347                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2348                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2349                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2350                    sat = true;
2351                } else {
2352                    destElem = srcElem1 << shiftAmt;
2353                }
2354            }
2355            if (sat) {
2356                fpscr.qc = 1;
2357                destElem = mask(sizeof(Element) * 8 - 1);
2358                if (srcElem1 < 0)
2359                    destElem = ~destElem;
2360            }
2361        } else {
2362            destElem = srcElem1;
2363        }
2364        FpscrQc = fpscr;
2365    '''
2366    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2367    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2368
2369    vqrshlUCode = '''
2370        int16_t shiftAmt = (int8_t)srcElem2;
2371        FPSCR fpscr = (FPSCR) FpscrQc;
2372        if (shiftAmt < 0) {
2373            shiftAmt = -shiftAmt;
2374            Element rBit = 0;
2375            if (shiftAmt <= sizeof(Element) * 8)
2376                rBit = bits(srcElem1, shiftAmt - 1);
2377            if (shiftAmt >= sizeof(Element) * 8) {
2378                shiftAmt = sizeof(Element) * 8 - 1;
2379                destElem = 0;
2380            } else {
2381                destElem = (srcElem1 >> shiftAmt);
2382            }
2383            destElem += rBit;
2384        } else {
2385            if (shiftAmt >= sizeof(Element) * 8) {
2386                if (srcElem1 != 0) {
2387                    destElem = mask(sizeof(Element) * 8);
2388                    fpscr.qc = 1;
2389                } else {
2390                    destElem = 0;
2391                }
2392            } else {
2393                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2394                            sizeof(Element) * 8 - shiftAmt)) {
2395                    destElem = mask(sizeof(Element) * 8);
2396                    fpscr.qc = 1;
2397                } else {
2398                    destElem = srcElem1 << shiftAmt;
2399                }
2400            }
2401        }
2402        FpscrQc = fpscr;
2403    '''
2404    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2405    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2406
2407    vqrshlSCode = '''
2408        int16_t shiftAmt = (int8_t)srcElem2;
2409        FPSCR fpscr = (FPSCR) FpscrQc;
2410        if (shiftAmt < 0) {
2411            shiftAmt = -shiftAmt;
2412            Element rBit = 0;
2413            if (shiftAmt <= sizeof(Element) * 8)
2414                rBit = bits(srcElem1, shiftAmt - 1);
2415            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2416                rBit = 1;
2417            if (shiftAmt >= sizeof(Element) * 8) {
2418                shiftAmt = sizeof(Element) * 8 - 1;
2419                destElem = 0;
2420            } else {
2421                destElem = (srcElem1 >> shiftAmt);
2422            }
2423            // Make sure the right shift sign extended when it should.
2424            if (srcElem1 < 0 && destElem >= 0) {
2425                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2426                                             1 - shiftAmt));
2427            }
2428            destElem += rBit;
2429        } else if (shiftAmt > 0) {
2430            bool sat = false;
2431            if (shiftAmt >= sizeof(Element) * 8) {
2432                if (srcElem1 != 0)
2433                    sat = true;
2434                else
2435                    destElem = 0;
2436            } else {
2437                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2438                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2439                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2440                    sat = true;
2441                } else {
2442                    destElem = srcElem1 << shiftAmt;
2443                }
2444            }
2445            if (sat) {
2446                fpscr.qc = 1;
2447                destElem = mask(sizeof(Element) * 8 - 1);
2448                if (srcElem1 < 0)
2449                    destElem = ~destElem;
2450            }
2451        } else {
2452            destElem = srcElem1;
2453        }
2454        FpscrQc = fpscr;
2455    '''
2456    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2457    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2458
2459    vabaCode = '''
2460        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2461                                            (srcElem2 - srcElem1);
2462    '''
2463    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2464    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2465    vabalCode = '''
2466        destElem += (srcElem1 > srcElem2) ?
2467            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2468            ((BigElement)srcElem2 - (BigElement)srcElem1);
2469    '''
2470    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2471
2472    vabdCode = '''
2473        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2474                                           (srcElem2 - srcElem1);
2475    '''
2476    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2477    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2478    vabdlCode = '''
2479        destElem = (srcElem1 > srcElem2) ?
2480            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2481            ((BigElement)srcElem2 - (BigElement)srcElem1);
2482    '''
2483    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2484
2485    vtstCode = '''
2486        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2487    '''
2488    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2489    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2490
2491    vmulCode = '''
2492        destElem = srcElem1 * srcElem2;
2493    '''
2494    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2495    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2496    vmullCode = '''
2497        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2498    '''
2499    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2500
2501    vmlaCode = '''
2502        destElem = destElem + srcElem1 * srcElem2;
2503    '''
2504    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2505    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2506    vmlalCode = '''
2507        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2508    '''
2509    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2510
2511    vqdmlalCode = '''
2512        FPSCR fpscr = (FPSCR) FpscrQc;
2513        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2514        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2515        Element halfNeg = maxNeg / 2;
2516        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2517            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2518            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2519            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2520            fpscr.qc = 1;
2521        }
2522        bool negPreDest = ltz(destElem);
2523        destElem += midElem;
2524        bool negDest = ltz(destElem);
2525        bool negMid = ltz(midElem);
2526        if (negPreDest == negMid && negMid != negDest) {
2527            destElem = mask(sizeof(BigElement) * 8 - 1);
2528            if (negPreDest)
2529                destElem = ~destElem;
2530            fpscr.qc = 1;
2531        }
2532        FpscrQc = fpscr;
2533    '''
2534    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2535
2536    vqdmlslCode = '''
2537        FPSCR fpscr = (FPSCR) FpscrQc;
2538        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2539        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2540        Element halfNeg = maxNeg / 2;
2541        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2542            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2543            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2544            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2545            fpscr.qc = 1;
2546        }
2547        bool negPreDest = ltz(destElem);
2548        destElem -= midElem;
2549        bool negDest = ltz(destElem);
2550        bool posMid = ltz((BigElement)-midElem);
2551        if (negPreDest == posMid && posMid != negDest) {
2552            destElem = mask(sizeof(BigElement) * 8 - 1);
2553            if (negPreDest)
2554                destElem = ~destElem;
2555            fpscr.qc = 1;
2556        }
2557        FpscrQc = fpscr;
2558    '''
2559    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2560
2561    vqdmullCode = '''
2562        FPSCR fpscr = (FPSCR) FpscrQc;
2563        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2564        if (srcElem1 == srcElem2 &&
2565                srcElem1 == (Element)((Element)1 <<
2566                    (Element)(sizeof(Element) * 8 - 1))) {
2567            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2568            fpscr.qc = 1;
2569        }
2570        FpscrQc = fpscr;
2571    '''
2572    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2573
2574    vmlsCode = '''
2575        destElem = destElem - srcElem1 * srcElem2;
2576    '''
2577    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2578    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2579    vmlslCode = '''
2580        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2581    '''
2582    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2583
2584    vmulpCode = '''
2585        destElem = 0;
2586        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2587            if (bits(srcElem2, j))
2588                destElem ^= srcElem1 << j;
2589        }
2590    '''
2591    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2592    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2593    vmullpCode = '''
2594        destElem = 0;
2595        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2596            if (bits(srcElem2, j))
2597                destElem ^= (BigElement)srcElem1 << j;
2598        }
2599    '''
2600    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2601
2602    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2603
2604    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2605
2606    vqdmulhCode = '''
2607        FPSCR fpscr = (FPSCR) FpscrQc;
2608        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2609                   (sizeof(Element) * 8);
2610        if (srcElem1 == srcElem2 &&
2611                srcElem1 == (Element)((Element)1 <<
2612                    (sizeof(Element) * 8 - 1))) {
2613            destElem = ~srcElem1;
2614            fpscr.qc = 1;
2615        }
2616        FpscrQc = fpscr;
2617    '''
2618    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2619    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2620
2621    vqrdmulhCode = '''
2622        FPSCR fpscr = (FPSCR) FpscrQc;
2623        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2624                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2625                   (sizeof(Element) * 8);
2626        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2627        Element halfNeg = maxNeg / 2;
2628        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2629            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2630            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2631            if (destElem < 0) {
2632                destElem = mask(sizeof(Element) * 8 - 1);
2633            } else {
2634                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2635            }
2636            fpscr.qc = 1;
2637        }
2638        FpscrQc = fpscr;
2639    '''
2640    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2641            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2642    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2643            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2644
2645    vmaxfpCode = '''
2646        FPSCR fpscr = (FPSCR) FpscrExc;
2647        bool done;
2648        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2649        if (!done) {
2650            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2651                               true, true, VfpRoundNearest);
2652        } else if (flushToZero(srcReg1, srcReg2)) {
2653            fpscr.idc = 1;
2654        }
2655        FpscrExc = fpscr;
2656    '''
2657    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2658    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2659
2660    vminfpCode = '''
2661        FPSCR fpscr = (FPSCR) FpscrExc;
2662        bool done;
2663        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2664        if (!done) {
2665            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2666                               true, true, VfpRoundNearest);
2667        } else if (flushToZero(srcReg1, srcReg2)) {
2668            fpscr.idc = 1;
2669        }
2670        FpscrExc = fpscr;
2671    '''
2672    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2673    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2674
2675    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2676                        2, vmaxfpCode, pairwise=True)
2677    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2678                        4, vmaxfpCode, pairwise=True)
2679
2680    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2681                        2, vminfpCode, pairwise=True)
2682    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2683                        4, vminfpCode, pairwise=True)
2684
2685    vaddfpCode = '''
2686        FPSCR fpscr = (FPSCR) FpscrExc;
2687        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2688                           true, true, VfpRoundNearest);
2689        FpscrExc = fpscr;
2690    '''
2691    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2692    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2693
2694    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2695                        2, vaddfpCode, pairwise=True)
2696    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2697                        4, vaddfpCode, pairwise=True)
2698
2699    vsubfpCode = '''
2700        FPSCR fpscr = (FPSCR) FpscrExc;
2701        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2702                           true, true, VfpRoundNearest);
2703        FpscrExc = fpscr;
2704    '''
2705    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2706    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2707
2708    vmulfpCode = '''
2709        FPSCR fpscr = (FPSCR) FpscrExc;
2710        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2711                           true, true, VfpRoundNearest);
2712        FpscrExc = fpscr;
2713    '''
2714    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2715    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2716
2717    vmlafpCode = '''
2718        FPSCR fpscr = (FPSCR) FpscrExc;
2719        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2720                             true, true, VfpRoundNearest);
2721        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2722                           true, true, VfpRoundNearest);
2723        FpscrExc = fpscr;
2724    '''
2725    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2726    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2727
2728    vfmafpCode = '''
2729        FPSCR fpscr = (FPSCR) FpscrExc;
2730        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2731                            true, true, VfpRoundNearest);
2732        FpscrExc = fpscr;
2733    '''
2734    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2735    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2736
2737    vfmsfpCode = '''
2738        FPSCR fpscr = (FPSCR) FpscrExc;
2739        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2740                            true, true, VfpRoundNearest);
2741        FpscrExc = fpscr;
2742    '''
2743    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2744    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2745
2746    vmlsfpCode = '''
2747        FPSCR fpscr = (FPSCR) FpscrExc;
2748        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2749                             true, true, VfpRoundNearest);
2750        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2751                           true, true, VfpRoundNearest);
2752        FpscrExc = fpscr;
2753    '''
2754    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2755    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2756
2757    vcgtfpCode = '''
2758        FPSCR fpscr = (FPSCR) FpscrExc;
2759        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2760                             true, true, VfpRoundNearest);
2761        destReg = (res == 0) ? -1 : 0;
2762        if (res == 2.0)
2763            fpscr.ioc = 1;
2764        FpscrExc = fpscr;
2765    '''
2766    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2767            2, vcgtfpCode, toInt = True)
2768    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2769            4, vcgtfpCode, toInt = True)
2770
2771    vcgefpCode = '''
2772        FPSCR fpscr = (FPSCR) FpscrExc;
2773        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2774                             true, true, VfpRoundNearest);
2775        destReg = (res == 0) ? -1 : 0;
2776        if (res == 2.0)
2777            fpscr.ioc = 1;
2778        FpscrExc = fpscr;
2779    '''
2780    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2781            2, vcgefpCode, toInt = True)
2782    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2783            4, vcgefpCode, toInt = True)
2784
2785    vacgtfpCode = '''
2786        FPSCR fpscr = (FPSCR) FpscrExc;
2787        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2788                             true, true, VfpRoundNearest);
2789        destReg = (res == 0) ? -1 : 0;
2790        if (res == 2.0)
2791            fpscr.ioc = 1;
2792        FpscrExc = fpscr;
2793    '''
2794    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2795            2, vacgtfpCode, toInt = True)
2796    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2797            4, vacgtfpCode, toInt = True)
2798
2799    vacgefpCode = '''
2800        FPSCR fpscr = (FPSCR) FpscrExc;
2801        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2802                             true, true, VfpRoundNearest);
2803        destReg = (res == 0) ? -1 : 0;
2804        if (res == 2.0)
2805            fpscr.ioc = 1;
2806        FpscrExc = fpscr;
2807    '''
2808    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2809            2, vacgefpCode, toInt = True)
2810    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2811            4, vacgefpCode, toInt = True)
2812
2813    vceqfpCode = '''
2814        FPSCR fpscr = (FPSCR) FpscrExc;
2815        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2816                             true, true, VfpRoundNearest);
2817        destReg = (res == 0) ? -1 : 0;
2818        if (res == 2.0)
2819            fpscr.ioc = 1;
2820        FpscrExc = fpscr;
2821    '''
2822    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2823            2, vceqfpCode, toInt = True)
2824    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2825            4, vceqfpCode, toInt = True)
2826
2827    vrecpsCode = '''
2828        FPSCR fpscr = (FPSCR) FpscrExc;
2829        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2830                           true, true, VfpRoundNearest);
2831        FpscrExc = fpscr;
2832    '''
2833    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2834    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2835
2836    vrsqrtsCode = '''
2837        FPSCR fpscr = (FPSCR) FpscrExc;
2838        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2839                           true, true, VfpRoundNearest);
2840        FpscrExc = fpscr;
2841    '''
2842    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2843    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2844
2845    vabdfpCode = '''
2846        FPSCR fpscr = (FPSCR) FpscrExc;
2847        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2848                             true, true, VfpRoundNearest);
2849        destReg = fabs(mid);
2850        FpscrExc = fpscr;
2851    '''
2852    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2853    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2854
2855    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2856    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2857    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2858    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2859    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2860
2861    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2862    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2863    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2864    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2865    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2866
2867    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2868    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2869    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2870    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2871    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2872
2873    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2874    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2875    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2876    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2877    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2878    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2879            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2880    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2881            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2882
2883    vshrCode = '''
2884        if (imm >= sizeof(srcElem1) * 8) {
2885            if (ltz(srcElem1))
2886                destElem = -1;
2887            else
2888                destElem = 0;
2889        } else {
2890            destElem = srcElem1 >> imm;
2891        }
2892    '''
2893    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2894    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2895
2896    vsraCode = '''
2897        Element mid;;
2898        if (imm >= sizeof(srcElem1) * 8) {
2899            mid = ltz(srcElem1) ? -1 : 0;
2900        } else {
2901            mid = srcElem1 >> imm;
2902            if (ltz(srcElem1) && !ltz(mid)) {
2903                mid |= -(mid & ((Element)1 <<
2904                            (sizeof(Element) * 8 - 1 - imm)));
2905            }
2906        }
2907        destElem += mid;
2908    '''
2909    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2910    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2911
2912    vrshrCode = '''
2913        if (imm > sizeof(srcElem1) * 8) {
2914            destElem = 0;
2915        } else if (imm) {
2916            Element rBit = bits(srcElem1, imm - 1);
2917            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2918        } else {
2919            destElem = srcElem1;
2920        }
2921    '''
2922    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2923    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2924
2925    vrsraCode = '''
2926        if (imm > sizeof(srcElem1) * 8) {
2927            destElem += 0;
2928        } else if (imm) {
2929            Element rBit = bits(srcElem1, imm - 1);
2930            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2931        } else {
2932            destElem += srcElem1;
2933        }
2934    '''
2935    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2936    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2937
2938    vsriCode = '''
2939        if (imm >= sizeof(Element) * 8)
2940            destElem = destElem;
2941        else
2942            destElem = (srcElem1 >> imm) |
2943                (destElem & ~mask(sizeof(Element) * 8 - imm));
2944    '''
2945    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2946    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2947
2948    vshlCode = '''
2949        if (imm >= sizeof(Element) * 8)
2950            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2951        else
2952            destElem = srcElem1 << imm;
2953    '''
2954    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2955    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2956
2957    vsliCode = '''
2958        if (imm >= sizeof(Element) * 8)
2959            destElem = destElem;
2960        else
2961            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2962    '''
2963    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2964    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2965
2966    vqshlCode = '''
2967        FPSCR fpscr = (FPSCR) FpscrQc;
2968        if (imm >= sizeof(Element) * 8) {
2969            if (srcElem1 != 0) {
2970                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2971                if (srcElem1 > 0)
2972                    destElem = ~destElem;
2973                fpscr.qc = 1;
2974            } else {
2975                destElem = 0;
2976            }
2977        } else if (imm) {
2978            destElem = (srcElem1 << imm);
2979            uint64_t topBits = bits((uint64_t)srcElem1,
2980                                    sizeof(Element) * 8 - 1,
2981                                    sizeof(Element) * 8 - 1 - imm);
2982            if (topBits != 0 && topBits != mask(imm + 1)) {
2983                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2984                if (srcElem1 > 0)
2985                    destElem = ~destElem;
2986                fpscr.qc = 1;
2987            }
2988        } else {
2989            destElem = srcElem1;
2990        }
2991        FpscrQc = fpscr;
2992    '''
2993    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2994    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2995
2996    vqshluCode = '''
2997        FPSCR fpscr = (FPSCR) FpscrQc;
2998        if (imm >= sizeof(Element) * 8) {
2999            if (srcElem1 != 0) {
3000                destElem = mask(sizeof(Element) * 8);
3001                fpscr.qc = 1;
3002            } else {
3003                destElem = 0;
3004            }
3005        } else if (imm) {
3006            destElem = (srcElem1 << imm);
3007            uint64_t topBits = bits((uint64_t)srcElem1,
3008                                    sizeof(Element) * 8 - 1,
3009                                    sizeof(Element) * 8 - imm);
3010            if (topBits != 0) {
3011                destElem = mask(sizeof(Element) * 8);
3012                fpscr.qc = 1;
3013            }
3014        } else {
3015            destElem = srcElem1;
3016        }
3017        FpscrQc = fpscr;
3018    '''
3019    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3020    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3021
3022    vqshlusCode = '''
3023        FPSCR fpscr = (FPSCR) FpscrQc;
3024        if (imm >= sizeof(Element) * 8) {
3025            if (srcElem1 < 0) {
3026                destElem = 0;
3027                fpscr.qc = 1;
3028            } else if (srcElem1 > 0) {
3029                destElem = mask(sizeof(Element) * 8);
3030                fpscr.qc = 1;
3031            } else {
3032                destElem = 0;
3033            }
3034        } else if (imm) {
3035            destElem = (srcElem1 << imm);
3036            uint64_t topBits = bits((uint64_t)srcElem1,
3037                                    sizeof(Element) * 8 - 1,
3038                                    sizeof(Element) * 8 - imm);
3039            if (srcElem1 < 0) {
3040                destElem = 0;
3041                fpscr.qc = 1;
3042            } else if (topBits != 0) {
3043                destElem = mask(sizeof(Element) * 8);
3044                fpscr.qc = 1;
3045            }
3046        } else {
3047            if (srcElem1 < 0) {
3048                fpscr.qc = 1;
3049                destElem = 0;
3050            } else {
3051                destElem = srcElem1;
3052            }
3053        }
3054        FpscrQc = fpscr;
3055    '''
3056    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3057    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3058
3059    vshrnCode = '''
3060        if (imm >= sizeof(srcElem1) * 8) {
3061            destElem = 0;
3062        } else {
3063            destElem = srcElem1 >> imm;
3064        }
3065    '''
3066    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3067
3068    vrshrnCode = '''
3069        if (imm > sizeof(srcElem1) * 8) {
3070            destElem = 0;
3071        } else if (imm) {
3072            Element rBit = bits(srcElem1, imm - 1);
3073            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3074        } else {
3075            destElem = srcElem1;
3076        }
3077    '''
3078    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3079
3080    vqshrnCode = '''
3081        FPSCR fpscr = (FPSCR) FpscrQc;
3082        if (imm > sizeof(srcElem1) * 8) {
3083            if (srcElem1 != 0 && srcElem1 != -1)
3084                fpscr.qc = 1;
3085            destElem = 0;
3086        } else if (imm) {
3087            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3088            mid |= -(mid & ((BigElement)1 <<
3089                        (sizeof(BigElement) * 8 - 1 - imm)));
3090            if (mid != (Element)mid) {
3091                destElem = mask(sizeof(Element) * 8 - 1);
3092                if (srcElem1 < 0)
3093                    destElem = ~destElem;
3094                fpscr.qc = 1;
3095            } else {
3096                destElem = mid;
3097            }
3098        } else {
3099            destElem = srcElem1;
3100        }
3101        FpscrQc = fpscr;
3102    '''
3103    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3104
3105    vqshrunCode = '''
3106        FPSCR fpscr = (FPSCR) FpscrQc;
3107        if (imm > sizeof(srcElem1) * 8) {
3108            if (srcElem1 != 0)
3109                fpscr.qc = 1;
3110            destElem = 0;
3111        } else if (imm) {
3112            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3113            if (mid != (Element)mid) {
3114                destElem = mask(sizeof(Element) * 8);
3115                fpscr.qc = 1;
3116            } else {
3117                destElem = mid;
3118            }
3119        } else {
3120            destElem = srcElem1;
3121        }
3122        FpscrQc = fpscr;
3123    '''
3124    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3125                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3126
3127    vqshrunsCode = '''
3128        FPSCR fpscr = (FPSCR) FpscrQc;
3129        if (imm > sizeof(srcElem1) * 8) {
3130            if (srcElem1 != 0)
3131                fpscr.qc = 1;
3132            destElem = 0;
3133        } else if (imm) {
3134            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3135            if (bits(mid, sizeof(BigElement) * 8 - 1,
3136                          sizeof(Element) * 8) != 0) {
3137                if (srcElem1 < 0) {
3138                    destElem = 0;
3139                } else {
3140                    destElem = mask(sizeof(Element) * 8);
3141                }
3142                fpscr.qc = 1;
3143            } else {
3144                destElem = mid;
3145            }
3146        } else {
3147            destElem = srcElem1;
3148        }
3149        FpscrQc = fpscr;
3150    '''
3151    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3152                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3153
3154    vqrshrnCode = '''
3155        FPSCR fpscr = (FPSCR) FpscrQc;
3156        if (imm > sizeof(srcElem1) * 8) {
3157            if (srcElem1 != 0 && srcElem1 != -1)
3158                fpscr.qc = 1;
3159            destElem = 0;
3160        } else if (imm) {
3161            BigElement mid = (srcElem1 >> (imm - 1));
3162            uint64_t rBit = mid & 0x1;
3163            mid >>= 1;
3164            mid |= -(mid & ((BigElement)1 <<
3165                        (sizeof(BigElement) * 8 - 1 - imm)));
3166            mid += rBit;
3167            if (mid != (Element)mid) {
3168                destElem = mask(sizeof(Element) * 8 - 1);
3169                if (srcElem1 < 0)
3170                    destElem = ~destElem;
3171                fpscr.qc = 1;
3172            } else {
3173                destElem = mid;
3174            }
3175        } else {
3176            if (srcElem1 != (Element)srcElem1) {
3177                destElem = mask(sizeof(Element) * 8 - 1);
3178                if (srcElem1 < 0)
3179                    destElem = ~destElem;
3180                fpscr.qc = 1;
3181            } else {
3182                destElem = srcElem1;
3183            }
3184        }
3185        FpscrQc = fpscr;
3186    '''
3187    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3188                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3189
3190    vqrshrunCode = '''
3191        FPSCR fpscr = (FPSCR) FpscrQc;
3192        if (imm > sizeof(srcElem1) * 8) {
3193            if (srcElem1 != 0)
3194                fpscr.qc = 1;
3195            destElem = 0;
3196        } else if (imm) {
3197            BigElement mid = (srcElem1 >> (imm - 1));
3198            uint64_t rBit = mid & 0x1;
3199            mid >>= 1;
3200            mid += rBit;
3201            if (mid != (Element)mid) {
3202                destElem = mask(sizeof(Element) * 8);
3203                fpscr.qc = 1;
3204            } else {
3205                destElem = mid;
3206            }
3207        } else {
3208            if (srcElem1 != (Element)srcElem1) {
3209                destElem = mask(sizeof(Element) * 8 - 1);
3210                fpscr.qc = 1;
3211            } else {
3212                destElem = srcElem1;
3213            }
3214        }
3215        FpscrQc = fpscr;
3216    '''
3217    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3218                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3219
3220    vqrshrunsCode = '''
3221        FPSCR fpscr = (FPSCR) FpscrQc;
3222        if (imm > sizeof(srcElem1) * 8) {
3223            if (srcElem1 != 0)
3224                fpscr.qc = 1;
3225            destElem = 0;
3226        } else if (imm) {
3227            BigElement mid = (srcElem1 >> (imm - 1));
3228            uint64_t rBit = mid & 0x1;
3229            mid >>= 1;
3230            mid |= -(mid & ((BigElement)1 <<
3231                            (sizeof(BigElement) * 8 - 1 - imm)));
3232            mid += rBit;
3233            if (bits(mid, sizeof(BigElement) * 8 - 1,
3234                          sizeof(Element) * 8) != 0) {
3235                if (srcElem1 < 0) {
3236                    destElem = 0;
3237                } else {
3238                    destElem = mask(sizeof(Element) * 8);
3239                }
3240                fpscr.qc = 1;
3241            } else {
3242                destElem = mid;
3243            }
3244        } else {
3245            if (srcElem1 < 0) {
3246                fpscr.qc = 1;
3247                destElem = 0;
3248            } else {
3249                destElem = srcElem1;
3250            }
3251        }
3252        FpscrQc = fpscr;
3253    '''
3254    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3255                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3256
3257    vshllCode = '''
3258        if (imm >= sizeof(destElem) * 8) {
3259            destElem = 0;
3260        } else {
3261            destElem = (BigElement)srcElem1 << imm;
3262        }
3263    '''
3264    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3265
3266    vmovlCode = '''
3267        destElem = srcElem1;
3268    '''
3269    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3270
3271    vcvt2ufxCode = '''
3272        FPSCR fpscr = (FPSCR) FpscrExc;
3273        if (flushToZero(srcElem1))
3274            fpscr.idc = 1;
3275        VfpSavedState state = prepFpState(VfpRoundNearest);
3276        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3277        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3278        __asm__ __volatile__("" :: "m" (destReg));
3279        finishVfp(fpscr, state, true);
3280        FpscrExc = fpscr;
3281    '''
3282    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3283            2, vcvt2ufxCode, toInt = True)
3284    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3285            4, vcvt2ufxCode, toInt = True)
3286
3287    vcvt2sfxCode = '''
3288        FPSCR fpscr = (FPSCR) FpscrExc;
3289        if (flushToZero(srcElem1))
3290            fpscr.idc = 1;
3291        VfpSavedState state = prepFpState(VfpRoundNearest);
3292        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3293        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3294        __asm__ __volatile__("" :: "m" (destReg));
3295        finishVfp(fpscr, state, true);
3296        FpscrExc = fpscr;
3297    '''
3298    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3299            2, vcvt2sfxCode, toInt = True)
3300    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3301            4, vcvt2sfxCode, toInt = True)
3302
3303    vcvtu2fpCode = '''
3304        FPSCR fpscr = (FPSCR) FpscrExc;
3305        VfpSavedState state = prepFpState(VfpRoundNearest);
3306        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3307        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3308        __asm__ __volatile__("" :: "m" (destElem));
3309        finishVfp(fpscr, state, true);
3310        FpscrExc = fpscr;
3311    '''
3312    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3313            2, vcvtu2fpCode, fromInt = True)
3314    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3315            4, vcvtu2fpCode, fromInt = True)
3316
3317    vcvts2fpCode = '''
3318        FPSCR fpscr = (FPSCR) FpscrExc;
3319        VfpSavedState state = prepFpState(VfpRoundNearest);
3320        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3321        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3322        __asm__ __volatile__("" :: "m" (destElem));
3323        finishVfp(fpscr, state, true);
3324        FpscrExc = fpscr;
3325    '''
3326    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3327            2, vcvts2fpCode, fromInt = True)
3328    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3329            4, vcvts2fpCode, fromInt = True)
3330
3331    vcvts2hCode = '''
3332        destElem = 0;
3333        FPSCR fpscr = (FPSCR) FpscrExc;
3334        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3335        if (flushToZero(srcFp1))
3336            fpscr.idc = 1;
3337        VfpSavedState state = prepFpState(VfpRoundNearest);
3338        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3339                                : "m" (srcFp1), "m" (destElem));
3340        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3341                              fpscr.ahp, srcFp1);
3342        __asm__ __volatile__("" :: "m" (destElem));
3343        finishVfp(fpscr, state, true);
3344        FpscrExc = fpscr;
3345    '''
3346    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3347
3348    vcvth2sCode = '''
3349        destElem = 0;
3350        FPSCR fpscr = (FPSCR) FpscrExc;
3351        VfpSavedState state = prepFpState(VfpRoundNearest);
3352        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3353                                : "m" (srcElem1), "m" (destElem));
3354        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3355        __asm__ __volatile__("" :: "m" (destElem));
3356        finishVfp(fpscr, state, true);
3357        FpscrExc = fpscr;
3358    '''
3359    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3360
3361    vrsqrteCode = '''
3362        destElem = unsignedRSqrtEstimate(srcElem1);
3363    '''
3364    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3365    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3366
3367    vrsqrtefpCode = '''
3368        FPSCR fpscr = (FPSCR) FpscrExc;
3369        if (flushToZero(srcReg1))
3370            fpscr.idc = 1;
3371        destReg = fprSqrtEstimate(fpscr, srcReg1);
3372        FpscrExc = fpscr;
3373    '''
3374    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3375    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3376
3377    vrecpeCode = '''
3378        destElem = unsignedRecipEstimate(srcElem1);
3379    '''
3380    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3381    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3382
3383    vrecpefpCode = '''
3384        FPSCR fpscr = (FPSCR) FpscrExc;
3385        if (flushToZero(srcReg1))
3386            fpscr.idc = 1;
3387        destReg = fpRecipEstimate(fpscr, srcReg1);
3388        FpscrExc = fpscr;
3389    '''
3390    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3391    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3392
3393    vrev16Code = '''
3394        destElem = srcElem1;
3395        unsigned groupSize = ((1 << 1) / sizeof(Element));
3396        unsigned reverseMask = (groupSize - 1);
3397        j = i ^ reverseMask;
3398    '''
3399    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3400    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3401    vrev32Code = '''
3402        destElem = srcElem1;
3403        unsigned groupSize = ((1 << 2) / sizeof(Element));
3404        unsigned reverseMask = (groupSize - 1);
3405        j = i ^ reverseMask;
3406    '''
3407    twoRegMiscInst("vrev32", "NVrev32D",
3408            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3409    twoRegMiscInst("vrev32", "NVrev32Q",
3410            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3411    vrev64Code = '''
3412        destElem = srcElem1;
3413        unsigned groupSize = ((1 << 3) / sizeof(Element));
3414        unsigned reverseMask = (groupSize - 1);
3415        j = i ^ reverseMask;
3416    '''
3417    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3418    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3419
3420    split('exec')
3421    exec_output += vcompares + vcomparesL
3422
3423    vpaddlCode = '''
3424        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3425    '''
3426    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3427    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3428
3429    vpadalCode = '''
3430        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3431    '''
3432    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3433    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3434
3435    vclsCode = '''
3436        unsigned count = 0;
3437        if (srcElem1 < 0) {
3438            srcElem1 <<= 1;
3439            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3440                count++;
3441                srcElem1 <<= 1;
3442            }
3443        } else {
3444            srcElem1 <<= 1;
3445            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3446                count++;
3447                srcElem1 <<= 1;
3448            }
3449        }
3450        destElem = count;
3451    '''
3452    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3453    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3454
3455    vclzCode = '''
3456        unsigned count = 0;
3457        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3458            count++;
3459            srcElem1 <<= 1;
3460        }
3461        destElem = count;
3462    '''
3463    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3464    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3465
3466    vcntCode = '''
3467        unsigned count = 0;
3468        while (srcElem1 && count < sizeof(Element) * 8) {
3469            count += srcElem1 & 0x1;
3470            srcElem1 >>= 1;
3471        }
3472        destElem = count;
3473    '''
3474
3475    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3476    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3477
3478    vmvnCode = '''
3479        destElem = ~srcElem1;
3480    '''
3481    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3482    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3483
3484    vqabsCode = '''
3485        FPSCR fpscr = (FPSCR) FpscrQc;
3486        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3487            fpscr.qc = 1;
3488            destElem = ~srcElem1;
3489        } else if (srcElem1 < 0) {
3490            destElem = -srcElem1;
3491        } else {
3492            destElem = srcElem1;
3493        }
3494        FpscrQc = fpscr;
3495    '''
3496    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3497    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3498
3499    vqnegCode = '''
3500        FPSCR fpscr = (FPSCR) FpscrQc;
3501        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3502            fpscr.qc = 1;
3503            destElem = ~srcElem1;
3504        } else {
3505            destElem = -srcElem1;
3506        }
3507        FpscrQc = fpscr;
3508    '''
3509    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3510    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3511
3512    vabsCode = '''
3513        if (srcElem1 < 0) {
3514            destElem = -srcElem1;
3515        } else {
3516            destElem = srcElem1;
3517        }
3518    '''
3519
3520    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3521    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3522    vabsfpCode = '''
3523        union
3524        {
3525            uint32_t i;
3526            float f;
3527        } cStruct;
3528        cStruct.f = srcReg1;
3529        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3530        destReg = cStruct.f;
3531    '''
3532    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3533    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3534
3535    vnegCode = '''
3536        destElem = -srcElem1;
3537    '''
3538    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3539    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3540    vnegfpCode = '''
3541        destReg = -srcReg1;
3542    '''
3543    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3544    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3545
3546    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3547    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3548    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3549    vcgtfpCode = '''
3550        FPSCR fpscr = (FPSCR) FpscrExc;
3551        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3552                             true, true, VfpRoundNearest);
3553        destReg = (res == 0) ? -1 : 0;
3554        if (res == 2.0)
3555            fpscr.ioc = 1;
3556        FpscrExc = fpscr;
3557    '''
3558    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3559            2, vcgtfpCode, toInt = True)
3560    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3561            4, vcgtfpCode, toInt = True)
3562
3563    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3564    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3565    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3566    vcgefpCode = '''
3567        FPSCR fpscr = (FPSCR) FpscrExc;
3568        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3569                             true, true, VfpRoundNearest);
3570        destReg = (res == 0) ? -1 : 0;
3571        if (res == 2.0)
3572            fpscr.ioc = 1;
3573        FpscrExc = fpscr;
3574    '''
3575    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3576            2, vcgefpCode, toInt = True)
3577    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3578            4, vcgefpCode, toInt = True)
3579
3580    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3581    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3582    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3583    vceqfpCode = '''
3584        FPSCR fpscr = (FPSCR) FpscrExc;
3585        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3586                             true, true, VfpRoundNearest);
3587        destReg = (res == 0) ? -1 : 0;
3588        if (res == 2.0)
3589            fpscr.ioc = 1;
3590        FpscrExc = fpscr;
3591    '''
3592    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3593            2, vceqfpCode, toInt = True)
3594    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3595            4, vceqfpCode, toInt = True)
3596
3597    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3598    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3599    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3600    vclefpCode = '''
3601        FPSCR fpscr = (FPSCR) FpscrExc;
3602        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3603                             true, true, VfpRoundNearest);
3604        destReg = (res == 0) ? -1 : 0;
3605        if (res == 2.0)
3606            fpscr.ioc = 1;
3607        FpscrExc = fpscr;
3608    '''
3609    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3610            2, vclefpCode, toInt = True)
3611    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3612            4, vclefpCode, toInt = True)
3613
3614    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3615    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3616    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3617    vcltfpCode = '''
3618        FPSCR fpscr = (FPSCR) FpscrExc;
3619        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3620                             true, true, VfpRoundNearest);
3621        destReg = (res == 0) ? -1 : 0;
3622        if (res == 2.0)
3623            fpscr.ioc = 1;
3624        FpscrExc = fpscr;
3625    '''
3626    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3627            2, vcltfpCode, toInt = True)
3628    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3629            4, vcltfpCode, toInt = True)
3630
3631    vswpCode = '''
3632        FloatRegBits mid;
3633        for (unsigned r = 0; r < rCount; r++) {
3634            mid = srcReg1.regs[r];
3635            srcReg1.regs[r] = destReg.regs[r];
3636            destReg.regs[r] = mid;
3637        }
3638    '''
3639    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3640    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3641
3642    vtrnCode = '''
3643        Element mid;
3644        for (unsigned i = 0; i < eCount; i += 2) {
3645            mid = srcReg1.elements[i];
3646            srcReg1.elements[i] = destReg.elements[i + 1];
3647            destReg.elements[i + 1] = mid;
3648        }
3649    '''
3650    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3651            smallUnsignedTypes, 2, vtrnCode)
3652    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3653            smallUnsignedTypes, 4, vtrnCode)
3654
3655    vuzpCode = '''
3656        Element mid[eCount];
3657        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3658        for (unsigned i = 0; i < eCount / 2; i++) {
3659            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3660            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3661            destReg.elements[i] = destReg.elements[2 * i];
3662        }
3663        for (unsigned i = 0; i < eCount / 2; i++) {
3664            destReg.elements[eCount / 2 + i] = mid[2 * i];
3665        }
3666    '''
3667    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3668    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3669
3670    vzipCode = '''
3671        Element mid[eCount];
3672        memcpy(&mid, &destReg, sizeof(destReg));
3673        for (unsigned i = 0; i < eCount / 2; i++) {
3674            destReg.elements[2 * i] = mid[i];
3675            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3676        }
3677        for (int i = 0; i < eCount / 2; i++) {
3678            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3679            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3680        }
3681    '''
3682    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3683    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3684
3685    vmovnCode = 'destElem = srcElem1;'
3686    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3687
3688    vdupCode = 'destElem = srcElem1;'
3689    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3690    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3691
3692    def vdupGprInst(name, Name, opClass, types, rCount):
3693        global header_output, exec_output
3694        eWalkCode = '''
3695        RegVect destReg;
3696        for (unsigned i = 0; i < eCount; i++) {
3697            destReg.elements[i] = htog((Element)Op1);
3698        }
3699        '''
3700        for reg in range(rCount):
3701            eWalkCode += '''
3702            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3703            ''' % { "reg" : reg }
3704        iop = InstObjParams(name, Name,
3705                            "RegRegOp",
3706                            { "code": eWalkCode,
3707                              "r_count": rCount,
3708                              "predicate_test": predicateTest,
3709                              "op_class": opClass }, [])
3710        header_output += NeonRegRegOpDeclare.subst(iop)
3711        exec_output += NeonEqualRegExecute.subst(iop)
3712        for type in types:
3713            substDict = { "targs" : type,
3714                          "class_name" : Name }
3715            exec_output += NeonExecDeclare.subst(substDict)
3716    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3717    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3718
3719    vmovCode = 'destElem = imm;'
3720    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3721    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3722
3723    vorrCode = 'destElem |= imm;'
3724    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3725    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3726
3727    vmvnCode = 'destElem = ~imm;'
3728    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3729    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3730
3731    vbicCode = 'destElem &= ~imm;'
3732    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3733    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3734
3735    vqmovnCode = '''
3736    FPSCR fpscr = (FPSCR) FpscrQc;
3737    destElem = srcElem1;
3738    if ((BigElement)destElem != srcElem1) {
3739        fpscr.qc = 1;
3740        destElem = mask(sizeof(Element) * 8 - 1);
3741        if (srcElem1 < 0)
3742            destElem = ~destElem;
3743    }
3744    FpscrQc = fpscr;
3745    '''
3746    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3747
3748    vqmovunCode = '''
3749    FPSCR fpscr = (FPSCR) FpscrQc;
3750    destElem = srcElem1;
3751    if ((BigElement)destElem != srcElem1) {
3752        fpscr.qc = 1;
3753        destElem = mask(sizeof(Element) * 8);
3754    }
3755    FpscrQc = fpscr;
3756    '''
3757    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3758            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3759
3760    vqmovunsCode = '''
3761    FPSCR fpscr = (FPSCR) FpscrQc;
3762    destElem = srcElem1;
3763    if (srcElem1 < 0 ||
3764            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3765        fpscr.qc = 1;
3766        destElem = mask(sizeof(Element) * 8);
3767        if (srcElem1 < 0)
3768            destElem = ~destElem;
3769    }
3770    FpscrQc = fpscr;
3771    '''
3772    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3773            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3774
3775    def buildVext(name, Name, opClass, types, rCount, op):
3776        global header_output, exec_output
3777        eWalkCode = '''
3778        RegVect srcReg1, srcReg2, destReg;
3779        '''
3780        for reg in range(rCount):
3781            eWalkCode += simdEnabledCheckCode + '''
3782                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3783                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3784            ''' % { "reg" : reg }
3785        eWalkCode += op
3786        for reg in range(rCount):
3787            eWalkCode += '''
3788            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3789            ''' % { "reg" : reg }
3790        iop = InstObjParams(name, Name,
3791                            "RegRegRegImmOp",
3792                            { "code": eWalkCode,
3793                              "r_count": rCount,
3794                              "predicate_test": predicateTest,
3795                              "op_class": opClass }, [])
3796        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3797        exec_output += NeonEqualRegExecute.subst(iop)
3798        for type in types:
3799            substDict = { "targs" : type,
3800                          "class_name" : Name }
3801            exec_output += NeonExecDeclare.subst(substDict)
3802
3803    vextCode = '''
3804        for (unsigned i = 0; i < eCount; i++) {
3805            unsigned index = i + imm;
3806            if (index < eCount) {
3807                destReg.elements[i] = srcReg1.elements[index];
3808            } else {
3809                index -= eCount;
3810                if (index >= eCount) {
3811                    fault = new UndefinedInstruction(machInst, false, mnemonic);
3812                } else {
3813                    destReg.elements[i] = srcReg2.elements[index];
3814                }
3815            }
3816        }
3817    '''
3818    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3819    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3820
3821    def buildVtbxl(name, Name, opClass, length, isVtbl):
3822        global header_output, decoder_output, exec_output
3823        code = '''
3824            union
3825            {
3826                uint8_t bytes[32];
3827                FloatRegBits regs[8];
3828            } table;
3829
3830            union
3831            {
3832                uint8_t bytes[8];
3833                FloatRegBits regs[2];
3834            } destReg, srcReg2;
3835
3836            const unsigned length = %(length)d;
3837            const bool isVtbl = %(isVtbl)s;
3838
3839            srcReg2.regs[0] = htog(FpOp2P0_uw);
3840            srcReg2.regs[1] = htog(FpOp2P1_uw);
3841
3842            destReg.regs[0] = htog(FpDestP0_uw);
3843            destReg.regs[1] = htog(FpDestP1_uw);
3844        ''' % { "length" : length, "isVtbl" : isVtbl }
3845        for reg in range(8):
3846            if reg < length * 2:
3847                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3848                        { "reg" : reg }
3849            else:
3850                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3851        code += '''
3852        for (unsigned i = 0; i < sizeof(destReg); i++) {
3853            uint8_t index = srcReg2.bytes[i];
3854            if (index < 8 * length) {
3855                destReg.bytes[i] = table.bytes[index];
3856            } else {
3857                if (isVtbl)
3858                    destReg.bytes[i] = 0;
3859                // else destReg.bytes[i] unchanged
3860            }
3861        }
3862
3863        FpDestP0_uw = gtoh(destReg.regs[0]);
3864        FpDestP1_uw = gtoh(destReg.regs[1]);
3865        '''
3866        iop = InstObjParams(name, Name,
3867                            "RegRegRegOp",
3868                            { "code": code,
3869                              "predicate_test": predicateTest,
3870                              "op_class": opClass }, [])
3871        header_output += RegRegRegOpDeclare.subst(iop)
3872        decoder_output += RegRegRegOpConstructor.subst(iop)
3873        exec_output += PredOpExecute.subst(iop)
3874
3875    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3876    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3877    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3878    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3879
3880    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3881    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3882    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3883    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3884}};
3885