neon.isa revision 11443
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                          ExtMachInst machInst, IntRegIndex dest,
98                          IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                          ExtMachInst machInst, IntRegIndex dest,
116                          IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133                             IntRegIndex dest, IntRegIndex op1,
134                             IntRegIndex op2)
135    {
136        switch (size) {
137          case 1:
138            return new Base<int16_t>(machInst, dest, op1, op2);
139          case 2:
140            return new Base<int32_t>(machInst, dest, op1, op2);
141          default:
142            return new Unknown(machInst);
143        }
144    }
145
146    template <template <typename T> class Base>
147    StaticInstPtr
148    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149                                IntRegIndex dest, IntRegIndex op1,
150                                IntRegIndex op2, uint64_t imm)
151    {
152        switch (size) {
153          case 1:
154            return new Base<int16_t>(machInst, dest, op1, op2, imm);
155          case 2:
156            return new Base<int32_t>(machInst, dest, op1, op2, imm);
157          default:
158            return new Unknown(machInst);
159        }
160    }
161
162    template <template <typename T> class Base>
163    StaticInstPtr
164    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165                           ExtMachInst machInst, IntRegIndex dest,
166                           IntRegIndex op1, IntRegIndex op2)
167    {
168        if (notSigned) {
169            return decodeNeonUThreeUSReg<Base>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<Base>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUThreeSReg(bool q, unsigned size,
181                         ExtMachInst machInst, IntRegIndex dest,
182                         IntRegIndex op1, IntRegIndex op2)
183    {
184        if (q) {
185            return decodeNeonUThreeUSReg<BaseQ>(
186                    size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonUThreeUSReg<BaseD>(
189                    size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonSThreeSReg(bool q, unsigned size,
197                         ExtMachInst machInst, IntRegIndex dest,
198                         IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonSThreeUSReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonSThreeUSReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeXReg(bool q, unsigned size,
213                         ExtMachInst machInst, IntRegIndex dest,
214                         IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUSReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUThreeXReg(bool q, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (q) {
233            return decodeNeonUThreeUReg<BaseQ>(
234                    size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonUThreeUSReg<BaseD>(
237                    size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245                          ExtMachInst machInst, IntRegIndex dest,
246                          IntRegIndex op1, IntRegIndex op2)
247    {
248        if (notSigned) {
249            return decodeNeonUThreeSReg<BaseD, BaseQ>(
250                    q, size, machInst, dest, op1, op2);
251        } else {
252            return decodeNeonSThreeSReg<BaseD, BaseQ>(
253                    q, size, machInst, dest, op1, op2);
254        }
255    }
256
257    template <template <typename T> class BaseD,
258              template <typename T> class BaseQ>
259    StaticInstPtr
260    decodeNeonUThreeReg(bool q, unsigned size,
261                        ExtMachInst machInst, IntRegIndex dest,
262                        IntRegIndex op1, IntRegIndex op2)
263    {
264        if (q) {
265            return decodeNeonUThreeUReg<BaseQ>(
266                    size, machInst, dest, op1, op2);
267        } else {
268            return decodeNeonUThreeUReg<BaseD>(
269                    size, machInst, dest, op1, op2);
270        }
271    }
272
273    template <template <typename T> class BaseD,
274              template <typename T> class BaseQ>
275    StaticInstPtr
276    decodeNeonSThreeReg(bool q, unsigned size,
277                        ExtMachInst machInst, IntRegIndex dest,
278                        IntRegIndex op1, IntRegIndex op2)
279    {
280        if (q) {
281            return decodeNeonSThreeUReg<BaseQ>(
282                    size, machInst, dest, op1, op2);
283        } else {
284            return decodeNeonSThreeUReg<BaseD>(
285                    size, machInst, dest, op1, op2);
286        }
287    }
288
289    template <template <typename T> class BaseD,
290              template <typename T> class BaseQ>
291    StaticInstPtr
292    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293                         ExtMachInst machInst, IntRegIndex dest,
294                         IntRegIndex op1, IntRegIndex op2)
295    {
296        if (notSigned) {
297            return decodeNeonUThreeReg<BaseD, BaseQ>(
298                    q, size, machInst, dest, op1, op2);
299        } else {
300            return decodeNeonSThreeReg<BaseD, BaseQ>(
301                    q, size, machInst, dest, op1, op2);
302        }
303    }
304
305    template <template <typename T> class BaseD,
306              template <typename T> class BaseQ>
307    StaticInstPtr
308    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310    {
311        if (q) {
312            if (size)
313                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314            else
315                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316        } else {
317            if (size)
318                return new Unknown(machInst);
319            else
320                return new BaseD<uint32_t>(machInst, dest, op1, op2);
321        }
322    }
323
324    template <template <typename T> class Base>
325    StaticInstPtr
326    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328    {
329        if (size)
330            return new Base<uint64_t>(machInst, dest, op1, op2);
331        else
332            return new Base<uint32_t>(machInst, dest, op1, op2);
333    }
334
335    template <template <typename T> class Base>
336    StaticInstPtr
337    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338                               IntRegIndex dest, IntRegIndex op1,
339                               IntRegIndex op2, uint64_t imm)
340    {
341        if (size)
342            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343        else
344            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345    }
346
347    template <template <typename T> class BaseD,
348              template <typename T> class BaseQ>
349    StaticInstPtr
350    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351                                IntRegIndex dest, IntRegIndex op1,
352                                IntRegIndex op2, uint64_t imm)
353    {
354        if (q) {
355            switch (size) {
356              case 1:
357                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358              case 2:
359                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360              default:
361                return new Unknown(machInst);
362            }
363        } else {
364            switch (size) {
365              case 1:
366                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367              case 2:
368                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369              default:
370                return new Unknown(machInst);
371            }
372        }
373    }
374
375    template <template <typename T> class BaseD,
376              template <typename T> class BaseQ>
377    StaticInstPtr
378    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379                                IntRegIndex dest, IntRegIndex op1,
380                                IntRegIndex op2, uint64_t imm)
381    {
382        if (q) {
383            switch (size) {
384              case 1:
385                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386              case 2:
387                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388              default:
389                return new Unknown(machInst);
390            }
391        } else {
392            switch (size) {
393              case 1:
394                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395              case 2:
396                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397              default:
398                return new Unknown(machInst);
399            }
400        }
401    }
402
403    template <template <typename T> class BaseD,
404              template <typename T> class BaseQ>
405    StaticInstPtr
406    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407                             IntRegIndex dest, IntRegIndex op1,
408                             IntRegIndex op2, uint64_t imm)
409    {
410        if (q) {
411            if (size)
412                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413            else
414                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415        } else {
416            if (size)
417                return new Unknown(machInst);
418            else
419                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420        }
421    }
422
423    template <template <typename T> class BaseD,
424              template <typename T> class BaseQ>
425    StaticInstPtr
426    decodeNeonUTwoShiftReg(bool q, unsigned size,
427                           ExtMachInst machInst, IntRegIndex dest,
428                           IntRegIndex op1, uint64_t imm)
429    {
430        if (q) {
431            switch (size) {
432              case 0:
433                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434              case 1:
435                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436              case 2:
437                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438              case 3:
439                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440              default:
441                return new Unknown(machInst);
442            }
443        } else {
444            switch (size) {
445              case 0:
446                return new BaseD<uint8_t>(machInst, dest, op1, imm);
447              case 1:
448                return new BaseD<uint16_t>(machInst, dest, op1, imm);
449              case 2:
450                return new BaseD<uint32_t>(machInst, dest, op1, imm);
451              case 3:
452                return new BaseD<uint64_t>(machInst, dest, op1, imm);
453              default:
454                return new Unknown(machInst);
455            }
456        }
457    }
458
459    template <template <typename T> class BaseD,
460              template <typename T> class BaseQ>
461    StaticInstPtr
462    decodeNeonSTwoShiftReg(bool q, unsigned size,
463                           ExtMachInst machInst, IntRegIndex dest,
464                           IntRegIndex op1, uint64_t imm)
465    {
466        if (q) {
467            switch (size) {
468              case 0:
469                return new BaseQ<int8_t>(machInst, dest, op1, imm);
470              case 1:
471                return new BaseQ<int16_t>(machInst, dest, op1, imm);
472              case 2:
473                return new BaseQ<int32_t>(machInst, dest, op1, imm);
474              case 3:
475                return new BaseQ<int64_t>(machInst, dest, op1, imm);
476              default:
477                return new Unknown(machInst);
478            }
479        } else {
480            switch (size) {
481              case 0:
482                return new BaseD<int8_t>(machInst, dest, op1, imm);
483              case 1:
484                return new BaseD<int16_t>(machInst, dest, op1, imm);
485              case 2:
486                return new BaseD<int32_t>(machInst, dest, op1, imm);
487              case 3:
488                return new BaseD<int64_t>(machInst, dest, op1, imm);
489              default:
490                return new Unknown(machInst);
491            }
492        }
493    }
494
495
496    template <template <typename T> class BaseD,
497              template <typename T> class BaseQ>
498    StaticInstPtr
499    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500                            ExtMachInst machInst, IntRegIndex dest,
501                            IntRegIndex op1, uint64_t imm)
502    {
503        if (notSigned) {
504            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505                    q, size, machInst, dest, op1, imm);
506        } else {
507            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508                    q, size, machInst, dest, op1, imm);
509        }
510    }
511
512    template <template <typename T> class Base>
513    StaticInstPtr
514    decodeNeonUTwoShiftUSReg(unsigned size,
515                             ExtMachInst machInst, IntRegIndex dest,
516                             IntRegIndex op1, uint64_t imm)
517    {
518        switch (size) {
519          case 0:
520            return new Base<uint8_t>(machInst, dest, op1, imm);
521          case 1:
522            return new Base<uint16_t>(machInst, dest, op1, imm);
523          case 2:
524            return new Base<uint32_t>(machInst, dest, op1, imm);
525          default:
526            return new Unknown(machInst);
527        }
528    }
529
530    template <template <typename T> class Base>
531    StaticInstPtr
532    decodeNeonUTwoShiftUReg(unsigned size,
533                            ExtMachInst machInst, IntRegIndex dest,
534                            IntRegIndex op1, uint64_t imm)
535    {
536        switch (size) {
537          case 0:
538            return new Base<uint8_t>(machInst, dest, op1, imm);
539          case 1:
540            return new Base<uint16_t>(machInst, dest, op1, imm);
541          case 2:
542            return new Base<uint32_t>(machInst, dest, op1, imm);
543          case 3:
544            return new Base<uint64_t>(machInst, dest, op1, imm);
545          default:
546            return new Unknown(machInst);
547        }
548    }
549
550    template <template <typename T> class Base>
551    StaticInstPtr
552    decodeNeonSTwoShiftUReg(unsigned size,
553                            ExtMachInst machInst, IntRegIndex dest,
554                            IntRegIndex op1, uint64_t imm)
555    {
556        switch (size) {
557          case 0:
558            return new Base<int8_t>(machInst, dest, op1, imm);
559          case 1:
560            return new Base<int16_t>(machInst, dest, op1, imm);
561          case 2:
562            return new Base<int32_t>(machInst, dest, op1, imm);
563          case 3:
564            return new Base<int64_t>(machInst, dest, op1, imm);
565          default:
566            return new Unknown(machInst);
567        }
568    }
569
570    template <template <typename T> class BaseD,
571              template <typename T> class BaseQ>
572    StaticInstPtr
573    decodeNeonUTwoShiftSReg(bool q, unsigned size,
574                            ExtMachInst machInst, IntRegIndex dest,
575                            IntRegIndex op1, uint64_t imm)
576    {
577        if (q) {
578            return decodeNeonUTwoShiftUSReg<BaseQ>(
579                    size, machInst, dest, op1, imm);
580        } else {
581            return decodeNeonUTwoShiftUSReg<BaseD>(
582                    size, machInst, dest, op1, imm);
583        }
584    }
585
586    template <template <typename T> class Base>
587    StaticInstPtr
588    decodeNeonSTwoShiftUSReg(unsigned size,
589                             ExtMachInst machInst, IntRegIndex dest,
590                             IntRegIndex op1, uint64_t imm)
591    {
592        switch (size) {
593          case 0:
594            return new Base<int8_t>(machInst, dest, op1, imm);
595          case 1:
596            return new Base<int16_t>(machInst, dest, op1, imm);
597          case 2:
598            return new Base<int32_t>(machInst, dest, op1, imm);
599          default:
600            return new Unknown(machInst);
601        }
602    }
603
604    template <template <typename T> class BaseD,
605              template <typename T> class BaseQ>
606    StaticInstPtr
607    decodeNeonSTwoShiftSReg(bool q, unsigned size,
608                            ExtMachInst machInst, IntRegIndex dest,
609                            IntRegIndex op1, uint64_t imm)
610    {
611        if (q) {
612            return decodeNeonSTwoShiftUSReg<BaseQ>(
613                    size, machInst, dest, op1, imm);
614        } else {
615            return decodeNeonSTwoShiftUSReg<BaseD>(
616                    size, machInst, dest, op1, imm);
617        }
618    }
619
620    template <template <typename T> class BaseD,
621              template <typename T> class BaseQ>
622    StaticInstPtr
623    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624                             ExtMachInst machInst, IntRegIndex dest,
625                             IntRegIndex op1, uint64_t imm)
626    {
627        if (notSigned) {
628            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629                    q, size, machInst, dest, op1, imm);
630        } else {
631            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632                    q, size, machInst, dest, op1, imm);
633        }
634    }
635
636    template <template <typename T> class BaseD,
637              template <typename T> class BaseQ>
638    StaticInstPtr
639    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641    {
642        if (q) {
643            return decodeNeonUTwoShiftUReg<BaseQ>(
644                size, machInst, dest, op1, imm);
645        } else {
646            return decodeNeonUTwoShiftUSReg<BaseD>(
647                size, machInst, dest, op1, imm);
648        }
649    }
650
651    template <template <typename T> class BaseD,
652              template <typename T> class BaseQ>
653    StaticInstPtr
654    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656    {
657        if (q) {
658            return decodeNeonSTwoShiftUReg<BaseQ>(
659                size, machInst, dest, op1, imm);
660        } else {
661            return decodeNeonSTwoShiftUSReg<BaseD>(
662                size, machInst, dest, op1, imm);
663        }
664    }
665
666    template <template <typename T> class Base>
667    StaticInstPtr
668    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670    {
671        if (size)
672            return new Base<uint64_t>(machInst, dest, op1, imm);
673        else
674            return new Base<uint32_t>(machInst, dest, op1, imm);
675    }
676
677    template <template <typename T> class BaseD,
678              template <typename T> class BaseQ>
679    StaticInstPtr
680    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682    {
683        if (q) {
684            if (size)
685                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686            else
687                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688        } else {
689            if (size)
690                return new Unknown(machInst);
691            else
692                return new BaseD<uint32_t>(machInst, dest, op1, imm);
693        }
694    }
695
696    template <template <typename T> class Base>
697    StaticInstPtr
698    decodeNeonUTwoMiscUSReg(unsigned size,
699                            ExtMachInst machInst, IntRegIndex dest,
700                            IntRegIndex op1)
701    {
702        switch (size) {
703          case 0:
704            return new Base<uint8_t>(machInst, dest, op1);
705          case 1:
706            return new Base<uint16_t>(machInst, dest, op1);
707          case 2:
708            return new Base<uint32_t>(machInst, dest, op1);
709          default:
710            return new Unknown(machInst);
711        }
712    }
713
714    template <template <typename T> class Base>
715    StaticInstPtr
716    decodeNeonSTwoMiscUSReg(unsigned size,
717                            ExtMachInst machInst, IntRegIndex dest,
718                            IntRegIndex op1)
719    {
720        switch (size) {
721          case 0:
722            return new Base<int8_t>(machInst, dest, op1);
723          case 1:
724            return new Base<int16_t>(machInst, dest, op1);
725          case 2:
726            return new Base<int32_t>(machInst, dest, op1);
727          default:
728            return new Unknown(machInst);
729        }
730    }
731
732    template <template <typename T> class BaseD,
733              template <typename T> class BaseQ>
734    StaticInstPtr
735    decodeNeonUTwoMiscSReg(bool q, unsigned size,
736                           ExtMachInst machInst, IntRegIndex dest,
737                           IntRegIndex op1)
738    {
739        if (q) {
740            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741        } else {
742            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743        }
744    }
745
746    template <template <typename T> class BaseD,
747              template <typename T> class BaseQ>
748    StaticInstPtr
749    decodeNeonSTwoMiscSReg(bool q, unsigned size,
750                           ExtMachInst machInst, IntRegIndex dest,
751                           IntRegIndex op1)
752    {
753        if (q) {
754            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755        } else {
756            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757        }
758    }
759
760    template <template <typename T> class Base>
761    StaticInstPtr
762    decodeNeonUTwoMiscUReg(unsigned size,
763                           ExtMachInst machInst, IntRegIndex dest,
764                           IntRegIndex op1)
765    {
766        switch (size) {
767          case 0:
768            return new Base<uint8_t>(machInst, dest, op1);
769          case 1:
770            return new Base<uint16_t>(machInst, dest, op1);
771          case 2:
772            return new Base<uint32_t>(machInst, dest, op1);
773          case 3:
774            return new Base<uint64_t>(machInst, dest, op1);
775          default:
776            return new Unknown(machInst);
777        }
778    }
779
780    template <template <typename T> class Base>
781    StaticInstPtr
782    decodeNeonSTwoMiscUReg(unsigned size,
783                           ExtMachInst machInst, IntRegIndex dest,
784                           IntRegIndex op1)
785    {
786        switch (size) {
787          case 0:
788            return new Base<int8_t>(machInst, dest, op1);
789          case 1:
790            return new Base<int16_t>(machInst, dest, op1);
791          case 2:
792            return new Base<int32_t>(machInst, dest, op1);
793          case 3:
794            return new Base<int64_t>(machInst, dest, op1);
795          default:
796            return new Unknown(machInst);
797        }
798    }
799
800    template <template <typename T> class BaseD,
801              template <typename T> class BaseQ>
802    StaticInstPtr
803    decodeNeonSTwoMiscReg(bool q, unsigned size,
804                          ExtMachInst machInst, IntRegIndex dest,
805                          IntRegIndex op1)
806    {
807        if (q) {
808            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809        } else {
810            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811        }
812    }
813
814    template <template <typename T> class BaseD,
815              template <typename T> class BaseQ>
816    StaticInstPtr
817    decodeNeonUTwoMiscReg(bool q, unsigned size,
818                          ExtMachInst machInst, IntRegIndex dest,
819                          IntRegIndex op1)
820    {
821        if (q) {
822            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823        } else {
824            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825        }
826    }
827
828    template <template <typename T> class BaseD,
829              template <typename T> class BaseQ>
830    StaticInstPtr
831    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832                            ExtMachInst machInst, IntRegIndex dest,
833                            IntRegIndex op1)
834    {
835        if (notSigned) {
836            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837                    q, size, machInst, dest, op1);
838        } else {
839            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840                    q, size, machInst, dest, op1);
841        }
842    }
843
844    template <template <typename T> class BaseD,
845              template <typename T> class BaseQ>
846    StaticInstPtr
847    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848                           IntRegIndex dest, IntRegIndex op1)
849    {
850        if (q) {
851            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852        } else {
853            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854        }
855    }
856
857    template <template <typename T> class BaseD,
858              template <typename T> class BaseQ>
859    StaticInstPtr
860    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861                           IntRegIndex dest, IntRegIndex op1)
862    {
863        if (q) {
864            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865        } else {
866            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867        }
868    }
869
870    template <template <typename T> class BaseD,
871              template <typename T> class BaseQ>
872    StaticInstPtr
873    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874                            IntRegIndex dest, IntRegIndex op1)
875    {
876        if (q) {
877            if (size)
878                return new BaseQ<uint64_t>(machInst, dest, op1);
879            else
880                return new BaseQ<uint32_t>(machInst, dest, op1);
881        } else {
882            if (size)
883                return new Unknown(machInst);
884            else
885                return new BaseD<uint32_t>(machInst, dest, op1);
886        }
887    }
888
889    template <template <typename T> class BaseD,
890              template <typename T> class BaseQ>
891    StaticInstPtr
892    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893                                   IntRegIndex dest, IntRegIndex op1)
894    {
895        if (size)
896            return new BaseQ<uint64_t>(machInst, dest, op1);
897        else
898            return new BaseD<uint32_t>(machInst, dest, op1);
899    }
900
901    template <template <typename T> class Base>
902    StaticInstPtr
903    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904                              IntRegIndex dest, IntRegIndex op1)
905    {
906        if (size)
907            return new Base<uint64_t>(machInst, dest, op1);
908        else
909            return new Base<uint32_t>(machInst, dest, op1);
910    }
911
912    template <template <typename T> class BaseD,
913              template <typename T> class BaseQ>
914    StaticInstPtr
915    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916                              IntRegIndex dest, IntRegIndex op1)
917    {
918        if (q) {
919            switch (size) {
920              case 0x0:
921                return new BaseQ<uint8_t>(machInst, dest, op1);
922              case 0x1:
923                return new BaseQ<uint16_t>(machInst, dest, op1);
924              case 0x2:
925                return new BaseQ<uint32_t>(machInst, dest, op1);
926              default:
927                return new Unknown(machInst);
928            }
929        } else {
930            switch (size) {
931              case 0x0:
932                return new BaseD<uint8_t>(machInst, dest, op1);
933              case 0x1:
934                return new BaseD<uint16_t>(machInst, dest, op1);
935              default:
936                return new Unknown(machInst);
937            }
938        }
939    }
940
941    template <template <typename T> class BaseD,
942              template <typename T> class BaseQ,
943              template <typename T> class BaseBQ>
944    StaticInstPtr
945    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946                              IntRegIndex dest, IntRegIndex op1)
947    {
948        if (q) {
949            switch (size) {
950              case 0x0:
951                return new BaseQ<uint8_t>(machInst, dest, op1);
952              case 0x1:
953                return new BaseQ<uint16_t>(machInst, dest, op1);
954              case 0x2:
955                return new BaseBQ<uint32_t>(machInst, dest, op1);
956              default:
957                return new Unknown(machInst);
958            }
959        } else {
960            switch (size) {
961              case 0x0:
962                return new BaseD<uint8_t>(machInst, dest, op1);
963              case 0x1:
964                return new BaseD<uint16_t>(machInst, dest, op1);
965              default:
966                return new Unknown(machInst);
967            }
968        }
969    }
970
971    template <template <typename T> class BaseD,
972              template <typename T> class BaseQ>
973    StaticInstPtr
974    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975                              IntRegIndex dest, IntRegIndex op1)
976    {
977        if (q) {
978            switch (size) {
979              case 0x0:
980                return new BaseQ<int8_t>(machInst, dest, op1);
981              case 0x1:
982                return new BaseQ<int16_t>(machInst, dest, op1);
983              case 0x2:
984                return new BaseQ<int32_t>(machInst, dest, op1);
985              default:
986                return new Unknown(machInst);
987            }
988        } else {
989            switch (size) {
990              case 0x0:
991                return new BaseD<int8_t>(machInst, dest, op1);
992              case 0x1:
993                return new BaseD<int16_t>(machInst, dest, op1);
994              default:
995                return new Unknown(machInst);
996            }
997        }
998    }
999
1000    template <template <typename T> class BaseD,
1001              template <typename T> class BaseQ,
1002              template <typename T> class BaseBQ>
1003    StaticInstPtr
1004    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005                                  IntRegIndex dest, IntRegIndex op1)
1006    {
1007        if (q) {
1008            switch (size) {
1009              case 0x0:
1010                return new BaseQ<uint8_t>(machInst, dest, op1);
1011              case 0x1:
1012                return new BaseQ<uint16_t>(machInst, dest, op1);
1013              case 0x2:
1014                return new BaseBQ<uint32_t>(machInst, dest, op1);
1015              default:
1016                return new Unknown(machInst);
1017            }
1018        } else {
1019            switch (size) {
1020              case 0x0:
1021                return new BaseD<uint8_t>(machInst, dest, op1);
1022              case 0x1:
1023                return new BaseD<uint16_t>(machInst, dest, op1);
1024              default:
1025                return new Unknown(machInst);
1026            }
1027        }
1028    }
1029
1030    template <template <typename T> class BaseD,
1031              template <typename T> class BaseQ,
1032              template <typename T> class BaseBQ>
1033    StaticInstPtr
1034    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035                                  IntRegIndex dest, IntRegIndex op1)
1036    {
1037        if (q) {
1038            switch (size) {
1039              case 0x0:
1040                return new BaseQ<int8_t>(machInst, dest, op1);
1041              case 0x1:
1042                return new BaseQ<int16_t>(machInst, dest, op1);
1043              case 0x2:
1044                return new BaseBQ<int32_t>(machInst, dest, op1);
1045              default:
1046                return new Unknown(machInst);
1047            }
1048        } else {
1049            switch (size) {
1050              case 0x0:
1051                return new BaseD<int8_t>(machInst, dest, op1);
1052              case 0x1:
1053                return new BaseD<int16_t>(machInst, dest, op1);
1054              default:
1055                return new Unknown(machInst);
1056            }
1057        }
1058    }
1059}};
1060
1061let {{
1062    header_output = ""
1063    exec_output = ""
1064
1065    vcompares = '''
1066    static float
1067    vcgtFunc(float op1, float op2)
1068    {
1069        if (std::isnan(op1) || std::isnan(op2))
1070            return 2.0;
1071        return (op1 > op2) ? 0.0 : 1.0;
1072    }
1073
1074    static float
1075    vcgeFunc(float op1, float op2)
1076    {
1077        if (std::isnan(op1) || std::isnan(op2))
1078            return 2.0;
1079        return (op1 >= op2) ? 0.0 : 1.0;
1080    }
1081
1082    static float
1083    vceqFunc(float op1, float op2)
1084    {
1085        if (isSnan(op1) || isSnan(op2))
1086            return 2.0;
1087        return (op1 == op2) ? 0.0 : 1.0;
1088    }
1089'''
1090    vcomparesL = '''
1091    static float
1092    vcleFunc(float op1, float op2)
1093    {
1094        if (std::isnan(op1) || std::isnan(op2))
1095            return 2.0;
1096        return (op1 <= op2) ? 0.0 : 1.0;
1097    }
1098
1099    static float
1100    vcltFunc(float op1, float op2)
1101    {
1102        if (std::isnan(op1) || std::isnan(op2))
1103            return 2.0;
1104        return (op1 < op2) ? 0.0 : 1.0;
1105    }
1106'''
1107    vacomparesG = '''
1108    static float
1109    vacgtFunc(float op1, float op2)
1110    {
1111        if (std::isnan(op1) || std::isnan(op2))
1112            return 2.0;
1113        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114    }
1115
1116    static float
1117    vacgeFunc(float op1, float op2)
1118    {
1119        if (std::isnan(op1) || std::isnan(op2))
1120            return 2.0;
1121        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122    }
1123'''
1124
1125    exec_output += vcompares + vacomparesG
1126
1127    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130    signedTypes = smallSignedTypes + ("int64_t",)
1131    smallTypes = smallUnsignedTypes + smallSignedTypes
1132    allTypes = unsignedTypes + signedTypes
1133
1134    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135                          readDest=False, pairwise=False):
1136        global header_output, exec_output
1137        eWalkCode = simdEnabledCheckCode + '''
1138        RegVect srcReg1, srcReg2, destReg;
1139        '''
1140        for reg in range(rCount):
1141            eWalkCode += '''
1142                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144            ''' % { "reg" : reg }
1145            if readDest:
1146                eWalkCode += '''
1147                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148                ''' % { "reg" : reg }
1149        readDestCode = ''
1150        if readDest:
1151            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152        if pairwise:
1153            eWalkCode += '''
1154            for (unsigned i = 0; i < eCount; i++) {
1155                Element srcElem1 = gtoh(2 * i < eCount ?
1156                                        srcReg1.elements[2 * i] :
1157                                        srcReg2.elements[2 * i - eCount]);
1158                Element srcElem2 = gtoh(2 * i < eCount ?
1159                                        srcReg1.elements[2 * i + 1] :
1160                                        srcReg2.elements[2 * i + 1 - eCount]);
1161                Element destElem;
1162                %(readDest)s
1163                %(op)s
1164                destReg.elements[i] = htog(destElem);
1165            }
1166            ''' % { "op" : op, "readDest" : readDestCode }
1167        else:
1168            eWalkCode += '''
1169            for (unsigned i = 0; i < eCount; i++) {
1170                Element srcElem1 = gtoh(srcReg1.elements[i]);
1171                Element srcElem2 = gtoh(srcReg2.elements[i]);
1172                Element destElem;
1173                %(readDest)s
1174                %(op)s
1175                destReg.elements[i] = htog(destElem);
1176            }
1177            ''' % { "op" : op, "readDest" : readDestCode }
1178        for reg in range(rCount):
1179            eWalkCode += '''
1180            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181            ''' % { "reg" : reg }
1182        iop = InstObjParams(name, Name,
1183                            "RegRegRegOp",
1184                            { "code": eWalkCode,
1185                              "r_count": rCount,
1186                              "predicate_test": predicateTest,
1187                              "op_class": opClass }, [])
1188        header_output += NeonRegRegRegOpDeclare.subst(iop)
1189        exec_output += NeonEqualRegExecute.subst(iop)
1190        for type in types:
1191            substDict = { "targs" : type,
1192                          "class_name" : Name }
1193            exec_output += NeonExecDeclare.subst(substDict)
1194
1195    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196                            readDest=False, pairwise=False, toInt=False):
1197        global header_output, exec_output
1198        eWalkCode = simdEnabledCheckCode + '''
1199        typedef FloatReg FloatVect[rCount];
1200        FloatVect srcRegs1, srcRegs2;
1201        '''
1202        if toInt:
1203            eWalkCode += 'RegVect destRegs;\n'
1204        else:
1205            eWalkCode += 'FloatVect destRegs;\n'
1206        for reg in range(rCount):
1207            eWalkCode += '''
1208                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210            ''' % { "reg" : reg }
1211            if readDest:
1212                if toInt:
1213                    eWalkCode += '''
1214                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215                    ''' % { "reg" : reg }
1216                else:
1217                    eWalkCode += '''
1218                        destRegs[%(reg)d] = FpDestP%(reg)d;
1219                    ''' % { "reg" : reg }
1220        readDestCode = ''
1221        if readDest:
1222            readDestCode = 'destReg = destRegs[r];'
1223        destType = 'FloatReg'
1224        writeDest = 'destRegs[r] = destReg;'
1225        if toInt:
1226            destType = 'FloatRegBits'
1227            writeDest = 'destRegs.regs[r] = destReg;'
1228        if pairwise:
1229            eWalkCode += '''
1230            for (unsigned r = 0; r < rCount; r++) {
1231                FloatReg srcReg1 = (2 * r < rCount) ?
1232                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1233                FloatReg srcReg2 = (2 * r < rCount) ?
1234                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235                %(destType)s destReg;
1236                %(readDest)s
1237                %(op)s
1238                %(writeDest)s
1239            }
1240            ''' % { "op" : op,
1241                    "readDest" : readDestCode,
1242                    "destType" : destType,
1243                    "writeDest" : writeDest }
1244        else:
1245            eWalkCode += '''
1246            for (unsigned r = 0; r < rCount; r++) {
1247                FloatReg srcReg1 = srcRegs1[r];
1248                FloatReg srcReg2 = srcRegs2[r];
1249                %(destType)s destReg;
1250                %(readDest)s
1251                %(op)s
1252                %(writeDest)s
1253            }
1254            ''' % { "op" : op,
1255                    "readDest" : readDestCode,
1256                    "destType" : destType,
1257                    "writeDest" : writeDest }
1258        for reg in range(rCount):
1259            if toInt:
1260                eWalkCode += '''
1261                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262                ''' % { "reg" : reg }
1263            else:
1264                eWalkCode += '''
1265                FpDestP%(reg)d = destRegs[%(reg)d];
1266                ''' % { "reg" : reg }
1267        iop = InstObjParams(name, Name,
1268                            "FpRegRegRegOp",
1269                            { "code": eWalkCode,
1270                              "r_count": rCount,
1271                              "predicate_test": predicateTest,
1272                              "op_class": opClass }, [])
1273        header_output += NeonRegRegRegOpDeclare.subst(iop)
1274        exec_output += NeonEqualRegExecute.subst(iop)
1275        for type in types:
1276            substDict = { "targs" : type,
1277                          "class_name" : Name }
1278            exec_output += NeonExecDeclare.subst(substDict)
1279
1280    def threeUnequalRegInst(name, Name, opClass, types, op,
1281                            bigSrc1, bigSrc2, bigDest, readDest):
1282        global header_output, exec_output
1283        src1Cnt = src2Cnt = destCnt = 2
1284        src1Prefix = src2Prefix = destPrefix = ''
1285        if bigSrc1:
1286            src1Cnt = 4
1287            src1Prefix = 'Big'
1288        if bigSrc2:
1289            src2Cnt = 4
1290            src2Prefix = 'Big'
1291        if bigDest:
1292            destCnt = 4
1293            destPrefix = 'Big'
1294        eWalkCode = simdEnabledCheckCode + '''
1295            %sRegVect srcReg1;
1296            %sRegVect srcReg2;
1297            %sRegVect destReg;
1298        ''' % (src1Prefix, src2Prefix, destPrefix)
1299        for reg in range(src1Cnt):
1300            eWalkCode += '''
1301                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302            ''' % { "reg" : reg }
1303        for reg in range(src2Cnt):
1304            eWalkCode += '''
1305                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306            ''' % { "reg" : reg }
1307        if readDest:
1308            for reg in range(destCnt):
1309                eWalkCode += '''
1310                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311                ''' % { "reg" : reg }
1312        readDestCode = ''
1313        if readDest:
1314            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315        eWalkCode += '''
1316        for (unsigned i = 0; i < eCount; i++) {
1317            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319            %(destPrefix)sElement destElem;
1320            %(readDest)s
1321            %(op)s
1322            destReg.elements[i] = htog(destElem);
1323        }
1324        ''' % { "op" : op, "readDest" : readDestCode,
1325                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326                "destPrefix" : destPrefix }
1327        for reg in range(destCnt):
1328            eWalkCode += '''
1329            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330            ''' % { "reg" : reg }
1331        iop = InstObjParams(name, Name,
1332                            "RegRegRegOp",
1333                            { "code": eWalkCode,
1334                              "r_count": 2,
1335                              "predicate_test": predicateTest,
1336                              "op_class": opClass }, [])
1337        header_output += NeonRegRegRegOpDeclare.subst(iop)
1338        exec_output += NeonUnequalRegExecute.subst(iop)
1339        for type in types:
1340            substDict = { "targs" : type,
1341                          "class_name" : Name }
1342            exec_output += NeonExecDeclare.subst(substDict)
1343
1344    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345        threeUnequalRegInst(name, Name, opClass, types, op,
1346                            True, True, False, readDest)
1347
1348    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349        threeUnequalRegInst(name, Name, opClass, types, op,
1350                            False, False, True, readDest)
1351
1352    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353        threeUnequalRegInst(name, Name, opClass, types, op,
1354                            True, False, True, readDest)
1355
1356    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357        global header_output, exec_output
1358        eWalkCode = simdEnabledCheckCode + '''
1359        RegVect srcReg1, srcReg2, destReg;
1360        '''
1361        for reg in range(rCount):
1362            eWalkCode += '''
1363                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365            ''' % { "reg" : reg }
1366            if readDest:
1367                eWalkCode += '''
1368                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369                ''' % { "reg" : reg }
1370        readDestCode = ''
1371        if readDest:
1372            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373        eWalkCode += '''
1374        if (imm < 0 && imm >= eCount) {
1375            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1376                                                           mnemonic);
1377        } else {
1378            for (unsigned i = 0; i < eCount; i++) {
1379                Element srcElem1 = gtoh(srcReg1.elements[i]);
1380                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1381                Element destElem;
1382                %(readDest)s
1383                %(op)s
1384                destReg.elements[i] = htog(destElem);
1385            }
1386        }
1387        ''' % { "op" : op, "readDest" : readDestCode }
1388        for reg in range(rCount):
1389            eWalkCode += '''
1390            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1391            ''' % { "reg" : reg }
1392        iop = InstObjParams(name, Name,
1393                            "RegRegRegImmOp",
1394                            { "code": eWalkCode,
1395                              "r_count": rCount,
1396                              "predicate_test": predicateTest,
1397                              "op_class": opClass }, [])
1398        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1399        exec_output += NeonEqualRegExecute.subst(iop)
1400        for type in types:
1401            substDict = { "targs" : type,
1402                          "class_name" : Name }
1403            exec_output += NeonExecDeclare.subst(substDict)
1404
1405    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1406        global header_output, exec_output
1407        rCount = 2
1408        eWalkCode = simdEnabledCheckCode + '''
1409        RegVect srcReg1, srcReg2;
1410        BigRegVect destReg;
1411        '''
1412        for reg in range(rCount):
1413            eWalkCode += '''
1414                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1415                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1416            ''' % { "reg" : reg }
1417        if readDest:
1418            for reg in range(2 * rCount):
1419                eWalkCode += '''
1420                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1421                ''' % { "reg" : reg }
1422        readDestCode = ''
1423        if readDest:
1424            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1425        eWalkCode += '''
1426        if (imm < 0 && imm >= eCount) {
1427            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1428                                                          mnemonic);
1429        } else {
1430            for (unsigned i = 0; i < eCount; i++) {
1431                Element srcElem1 = gtoh(srcReg1.elements[i]);
1432                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1433                BigElement destElem;
1434                %(readDest)s
1435                %(op)s
1436                destReg.elements[i] = htog(destElem);
1437            }
1438        }
1439        ''' % { "op" : op, "readDest" : readDestCode }
1440        for reg in range(2 * rCount):
1441            eWalkCode += '''
1442            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1443            ''' % { "reg" : reg }
1444        iop = InstObjParams(name, Name,
1445                            "RegRegRegImmOp",
1446                            { "code": eWalkCode,
1447                              "r_count": rCount,
1448                              "predicate_test": predicateTest,
1449                              "op_class": opClass }, [])
1450        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1451        exec_output += NeonUnequalRegExecute.subst(iop)
1452        for type in types:
1453            substDict = { "targs" : type,
1454                          "class_name" : Name }
1455            exec_output += NeonExecDeclare.subst(substDict)
1456
1457    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1458        global header_output, exec_output
1459        eWalkCode = simdEnabledCheckCode + '''
1460        typedef FloatReg FloatVect[rCount];
1461        FloatVect srcRegs1, srcRegs2, destRegs;
1462        '''
1463        for reg in range(rCount):
1464            eWalkCode += '''
1465                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1466                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1467            ''' % { "reg" : reg }
1468            if readDest:
1469                eWalkCode += '''
1470                    destRegs[%(reg)d] = FpDestP%(reg)d;
1471                ''' % { "reg" : reg }
1472        readDestCode = ''
1473        if readDest:
1474            readDestCode = 'destReg = destRegs[i];'
1475        eWalkCode += '''
1476        if (imm < 0 && imm >= eCount) {
1477            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1478                                                           mnemonic);
1479        } else {
1480            for (unsigned i = 0; i < rCount; i++) {
1481                FloatReg srcReg1 = srcRegs1[i];
1482                FloatReg srcReg2 = srcRegs2[imm];
1483                FloatReg destReg;
1484                %(readDest)s
1485                %(op)s
1486                destRegs[i] = destReg;
1487            }
1488        }
1489        ''' % { "op" : op, "readDest" : readDestCode }
1490        for reg in range(rCount):
1491            eWalkCode += '''
1492            FpDestP%(reg)d = destRegs[%(reg)d];
1493            ''' % { "reg" : reg }
1494        iop = InstObjParams(name, Name,
1495                            "FpRegRegRegImmOp",
1496                            { "code": eWalkCode,
1497                              "r_count": rCount,
1498                              "predicate_test": predicateTest,
1499                              "op_class": opClass }, [])
1500        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1501        exec_output += NeonEqualRegExecute.subst(iop)
1502        for type in types:
1503            substDict = { "targs" : type,
1504                          "class_name" : Name }
1505            exec_output += NeonExecDeclare.subst(substDict)
1506
1507    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1508            readDest=False, toInt=False, fromInt=False):
1509        global header_output, exec_output
1510        eWalkCode = simdEnabledCheckCode + '''
1511        RegVect srcRegs1, destRegs;
1512        '''
1513        for reg in range(rCount):
1514            eWalkCode += '''
1515                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1516            ''' % { "reg" : reg }
1517            if readDest:
1518                eWalkCode += '''
1519                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1520                ''' % { "reg" : reg }
1521        readDestCode = ''
1522        if readDest:
1523            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1524            if toInt:
1525                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1526        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1527        if fromInt:
1528            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1529        declDest = 'Element destElem;'
1530        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1531        if toInt:
1532            declDest = 'FloatRegBits destReg;'
1533            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1534        eWalkCode += '''
1535        for (unsigned i = 0; i < eCount; i++) {
1536            %(readOp)s
1537            %(declDest)s
1538            %(readDest)s
1539            %(op)s
1540            %(writeDest)s
1541        }
1542        ''' % { "readOp" : readOpCode,
1543                "declDest" : declDest,
1544                "readDest" : readDestCode,
1545                "op" : op,
1546                "writeDest" : writeDestCode }
1547        for reg in range(rCount):
1548            eWalkCode += '''
1549            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1550            ''' % { "reg" : reg }
1551        iop = InstObjParams(name, Name,
1552                            "RegRegImmOp",
1553                            { "code": eWalkCode,
1554                              "r_count": rCount,
1555                              "predicate_test": predicateTest,
1556                              "op_class": opClass }, [])
1557        header_output += NeonRegRegImmOpDeclare.subst(iop)
1558        exec_output += NeonEqualRegExecute.subst(iop)
1559        for type in types:
1560            substDict = { "targs" : type,
1561                          "class_name" : Name }
1562            exec_output += NeonExecDeclare.subst(substDict)
1563
1564    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1565        global header_output, exec_output
1566        eWalkCode = simdEnabledCheckCode + '''
1567        BigRegVect srcReg1;
1568        RegVect destReg;
1569        '''
1570        for reg in range(4):
1571            eWalkCode += '''
1572                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1573            ''' % { "reg" : reg }
1574        if readDest:
1575            for reg in range(2):
1576                eWalkCode += '''
1577                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1578                ''' % { "reg" : reg }
1579        readDestCode = ''
1580        if readDest:
1581            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1582        eWalkCode += '''
1583        for (unsigned i = 0; i < eCount; i++) {
1584            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1585            Element destElem;
1586            %(readDest)s
1587            %(op)s
1588            destReg.elements[i] = htog(destElem);
1589        }
1590        ''' % { "op" : op, "readDest" : readDestCode }
1591        for reg in range(2):
1592            eWalkCode += '''
1593            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1594            ''' % { "reg" : reg }
1595        iop = InstObjParams(name, Name,
1596                            "RegRegImmOp",
1597                            { "code": eWalkCode,
1598                              "r_count": 2,
1599                              "predicate_test": predicateTest,
1600                              "op_class": opClass }, [])
1601        header_output += NeonRegRegImmOpDeclare.subst(iop)
1602        exec_output += NeonUnequalRegExecute.subst(iop)
1603        for type in types:
1604            substDict = { "targs" : type,
1605                          "class_name" : Name }
1606            exec_output += NeonExecDeclare.subst(substDict)
1607
1608    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1609        global header_output, exec_output
1610        eWalkCode = simdEnabledCheckCode + '''
1611        RegVect srcReg1;
1612        BigRegVect destReg;
1613        '''
1614        for reg in range(2):
1615            eWalkCode += '''
1616                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1617            ''' % { "reg" : reg }
1618        if readDest:
1619            for reg in range(4):
1620                eWalkCode += '''
1621                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1622                ''' % { "reg" : reg }
1623        readDestCode = ''
1624        if readDest:
1625            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1626        eWalkCode += '''
1627        for (unsigned i = 0; i < eCount; i++) {
1628            Element srcElem1 = gtoh(srcReg1.elements[i]);
1629            BigElement destElem;
1630            %(readDest)s
1631            %(op)s
1632            destReg.elements[i] = htog(destElem);
1633        }
1634        ''' % { "op" : op, "readDest" : readDestCode }
1635        for reg in range(4):
1636            eWalkCode += '''
1637            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1638            ''' % { "reg" : reg }
1639        iop = InstObjParams(name, Name,
1640                            "RegRegImmOp",
1641                            { "code": eWalkCode,
1642                              "r_count": 2,
1643                              "predicate_test": predicateTest,
1644                              "op_class": opClass }, [])
1645        header_output += NeonRegRegImmOpDeclare.subst(iop)
1646        exec_output += NeonUnequalRegExecute.subst(iop)
1647        for type in types:
1648            substDict = { "targs" : type,
1649                          "class_name" : Name }
1650            exec_output += NeonExecDeclare.subst(substDict)
1651
1652    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1653        global header_output, exec_output
1654        eWalkCode = simdEnabledCheckCode + '''
1655        RegVect srcReg1, destReg;
1656        '''
1657        for reg in range(rCount):
1658            eWalkCode += '''
1659                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1660            ''' % { "reg" : reg }
1661            if readDest:
1662                eWalkCode += '''
1663                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1664                ''' % { "reg" : reg }
1665        readDestCode = ''
1666        if readDest:
1667            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1668        eWalkCode += '''
1669        for (unsigned i = 0; i < eCount; i++) {
1670            unsigned j = i;
1671            Element srcElem1 = gtoh(srcReg1.elements[i]);
1672            Element destElem;
1673            %(readDest)s
1674            %(op)s
1675            destReg.elements[j] = htog(destElem);
1676        }
1677        ''' % { "op" : op, "readDest" : readDestCode }
1678        for reg in range(rCount):
1679            eWalkCode += '''
1680            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1681            ''' % { "reg" : reg }
1682        iop = InstObjParams(name, Name,
1683                            "RegRegOp",
1684                            { "code": eWalkCode,
1685                              "r_count": rCount,
1686                              "predicate_test": predicateTest,
1687                              "op_class": opClass }, [])
1688        header_output += NeonRegRegOpDeclare.subst(iop)
1689        exec_output += NeonEqualRegExecute.subst(iop)
1690        for type in types:
1691            substDict = { "targs" : type,
1692                          "class_name" : Name }
1693            exec_output += NeonExecDeclare.subst(substDict)
1694
1695    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1696        global header_output, exec_output
1697        eWalkCode = simdEnabledCheckCode + '''
1698        RegVect srcReg1, destReg;
1699        '''
1700        for reg in range(rCount):
1701            eWalkCode += '''
1702                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1703            ''' % { "reg" : reg }
1704            if readDest:
1705                eWalkCode += '''
1706                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1707                ''' % { "reg" : reg }
1708        readDestCode = ''
1709        if readDest:
1710            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1711        eWalkCode += '''
1712        for (unsigned i = 0; i < eCount; i++) {
1713            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1714            Element destElem;
1715            %(readDest)s
1716            %(op)s
1717            destReg.elements[i] = htog(destElem);
1718        }
1719        ''' % { "op" : op, "readDest" : readDestCode }
1720        for reg in range(rCount):
1721            eWalkCode += '''
1722            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1723            ''' % { "reg" : reg }
1724        iop = InstObjParams(name, Name,
1725                            "RegRegImmOp",
1726                            { "code": eWalkCode,
1727                              "r_count": rCount,
1728                              "predicate_test": predicateTest,
1729                              "op_class": opClass }, [])
1730        header_output += NeonRegRegImmOpDeclare.subst(iop)
1731        exec_output += NeonEqualRegExecute.subst(iop)
1732        for type in types:
1733            substDict = { "targs" : type,
1734                          "class_name" : Name }
1735            exec_output += NeonExecDeclare.subst(substDict)
1736
1737    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1738        global header_output, exec_output
1739        eWalkCode = simdEnabledCheckCode + '''
1740        RegVect srcReg1, destReg;
1741        '''
1742        for reg in range(rCount):
1743            eWalkCode += '''
1744                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1745                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1746            ''' % { "reg" : reg }
1747            if readDest:
1748                eWalkCode += '''
1749                ''' % { "reg" : reg }
1750        readDestCode = ''
1751        if readDest:
1752            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1753        eWalkCode += op
1754        for reg in range(rCount):
1755            eWalkCode += '''
1756            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1757            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1758            ''' % { "reg" : reg }
1759        iop = InstObjParams(name, Name,
1760                            "RegRegOp",
1761                            { "code": eWalkCode,
1762                              "r_count": rCount,
1763                              "predicate_test": predicateTest,
1764                              "op_class": opClass }, [])
1765        header_output += NeonRegRegOpDeclare.subst(iop)
1766        exec_output += NeonEqualRegExecute.subst(iop)
1767        for type in types:
1768            substDict = { "targs" : type,
1769                          "class_name" : Name }
1770            exec_output += NeonExecDeclare.subst(substDict)
1771
1772    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1773            readDest=False, toInt=False):
1774        global header_output, exec_output
1775        eWalkCode = simdEnabledCheckCode + '''
1776        typedef FloatReg FloatVect[rCount];
1777        FloatVect srcRegs1;
1778        '''
1779        if toInt:
1780            eWalkCode += 'RegVect destRegs;\n'
1781        else:
1782            eWalkCode += 'FloatVect destRegs;\n'
1783        for reg in range(rCount):
1784            eWalkCode += '''
1785                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1786            ''' % { "reg" : reg }
1787            if readDest:
1788                if toInt:
1789                    eWalkCode += '''
1790                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1791                    ''' % { "reg" : reg }
1792                else:
1793                    eWalkCode += '''
1794                        destRegs[%(reg)d] = FpDestP%(reg)d;
1795                    ''' % { "reg" : reg }
1796        readDestCode = ''
1797        if readDest:
1798            readDestCode = 'destReg = destRegs[i];'
1799        destType = 'FloatReg'
1800        writeDest = 'destRegs[r] = destReg;'
1801        if toInt:
1802            destType = 'FloatRegBits'
1803            writeDest = 'destRegs.regs[r] = destReg;'
1804        eWalkCode += '''
1805        for (unsigned r = 0; r < rCount; r++) {
1806            FloatReg srcReg1 = srcRegs1[r];
1807            %(destType)s destReg;
1808            %(readDest)s
1809            %(op)s
1810            %(writeDest)s
1811        }
1812        ''' % { "op" : op,
1813                "readDest" : readDestCode,
1814                "destType" : destType,
1815                "writeDest" : writeDest }
1816        for reg in range(rCount):
1817            if toInt:
1818                eWalkCode += '''
1819                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1820                ''' % { "reg" : reg }
1821            else:
1822                eWalkCode += '''
1823                FpDestP%(reg)d = destRegs[%(reg)d];
1824                ''' % { "reg" : reg }
1825        iop = InstObjParams(name, Name,
1826                            "FpRegRegOp",
1827                            { "code": eWalkCode,
1828                              "r_count": rCount,
1829                              "predicate_test": predicateTest,
1830                              "op_class": opClass }, [])
1831        header_output += NeonRegRegOpDeclare.subst(iop)
1832        exec_output += NeonEqualRegExecute.subst(iop)
1833        for type in types:
1834            substDict = { "targs" : type,
1835                          "class_name" : Name }
1836            exec_output += NeonExecDeclare.subst(substDict)
1837
1838    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1839        global header_output, exec_output
1840        eWalkCode = simdEnabledCheckCode + '''
1841        RegVect srcRegs;
1842        BigRegVect destReg;
1843        '''
1844        for reg in range(rCount):
1845            eWalkCode += '''
1846                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1847            ''' % { "reg" : reg }
1848            if readDest:
1849                eWalkCode += '''
1850                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1851                ''' % { "reg" : reg }
1852        readDestCode = ''
1853        if readDest:
1854            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1855        eWalkCode += '''
1856        for (unsigned i = 0; i < eCount / 2; i++) {
1857            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1858            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1859            BigElement destElem;
1860            %(readDest)s
1861            %(op)s
1862            destReg.elements[i] = htog(destElem);
1863        }
1864        ''' % { "op" : op, "readDest" : readDestCode }
1865        for reg in range(rCount):
1866            eWalkCode += '''
1867            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1868            ''' % { "reg" : reg }
1869        iop = InstObjParams(name, Name,
1870                            "RegRegOp",
1871                            { "code": eWalkCode,
1872                              "r_count": rCount,
1873                              "predicate_test": predicateTest,
1874                              "op_class": opClass }, [])
1875        header_output += NeonRegRegOpDeclare.subst(iop)
1876        exec_output += NeonUnequalRegExecute.subst(iop)
1877        for type in types:
1878            substDict = { "targs" : type,
1879                          "class_name" : Name }
1880            exec_output += NeonExecDeclare.subst(substDict)
1881
1882    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1883        global header_output, exec_output
1884        eWalkCode = simdEnabledCheckCode + '''
1885        BigRegVect srcReg1;
1886        RegVect destReg;
1887        '''
1888        for reg in range(4):
1889            eWalkCode += '''
1890                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1891            ''' % { "reg" : reg }
1892        if readDest:
1893            for reg in range(2):
1894                eWalkCode += '''
1895                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1896                ''' % { "reg" : reg }
1897        readDestCode = ''
1898        if readDest:
1899            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1900        eWalkCode += '''
1901        for (unsigned i = 0; i < eCount; i++) {
1902            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1903            Element destElem;
1904            %(readDest)s
1905            %(op)s
1906            destReg.elements[i] = htog(destElem);
1907        }
1908        ''' % { "op" : op, "readDest" : readDestCode }
1909        for reg in range(2):
1910            eWalkCode += '''
1911            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1912            ''' % { "reg" : reg }
1913        iop = InstObjParams(name, Name,
1914                            "RegRegOp",
1915                            { "code": eWalkCode,
1916                              "r_count": 2,
1917                              "predicate_test": predicateTest,
1918                              "op_class": opClass }, [])
1919        header_output += NeonRegRegOpDeclare.subst(iop)
1920        exec_output += NeonUnequalRegExecute.subst(iop)
1921        for type in types:
1922            substDict = { "targs" : type,
1923                          "class_name" : Name }
1924            exec_output += NeonExecDeclare.subst(substDict)
1925
1926    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1927        global header_output, exec_output
1928        eWalkCode = simdEnabledCheckCode + '''
1929        RegVect destReg;
1930        '''
1931        if readDest:
1932            for reg in range(rCount):
1933                eWalkCode += '''
1934                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1935                ''' % { "reg" : reg }
1936        readDestCode = ''
1937        if readDest:
1938            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1939        eWalkCode += '''
1940        for (unsigned i = 0; i < eCount; i++) {
1941            Element destElem;
1942            %(readDest)s
1943            %(op)s
1944            destReg.elements[i] = htog(destElem);
1945        }
1946        ''' % { "op" : op, "readDest" : readDestCode }
1947        for reg in range(rCount):
1948            eWalkCode += '''
1949            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1950            ''' % { "reg" : reg }
1951        iop = InstObjParams(name, Name,
1952                            "RegImmOp",
1953                            { "code": eWalkCode,
1954                              "r_count": rCount,
1955                              "predicate_test": predicateTest,
1956                              "op_class": opClass }, [])
1957        header_output += NeonRegImmOpDeclare.subst(iop)
1958        exec_output += NeonEqualRegExecute.subst(iop)
1959        for type in types:
1960            substDict = { "targs" : type,
1961                          "class_name" : Name }
1962            exec_output += NeonExecDeclare.subst(substDict)
1963
1964    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1965        global header_output, exec_output
1966        eWalkCode = simdEnabledCheckCode + '''
1967        RegVect srcReg1;
1968        BigRegVect destReg;
1969        '''
1970        for reg in range(2):
1971            eWalkCode += '''
1972                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1973            ''' % { "reg" : reg }
1974        if readDest:
1975            for reg in range(4):
1976                eWalkCode += '''
1977                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1978                ''' % { "reg" : reg }
1979        readDestCode = ''
1980        if readDest:
1981            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1982        eWalkCode += '''
1983        for (unsigned i = 0; i < eCount; i++) {
1984            Element srcElem1 = gtoh(srcReg1.elements[i]);
1985            BigElement destElem;
1986            %(readDest)s
1987            %(op)s
1988            destReg.elements[i] = htog(destElem);
1989        }
1990        ''' % { "op" : op, "readDest" : readDestCode }
1991        for reg in range(4):
1992            eWalkCode += '''
1993            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1994            ''' % { "reg" : reg }
1995        iop = InstObjParams(name, Name,
1996                            "RegRegOp",
1997                            { "code": eWalkCode,
1998                              "r_count": 2,
1999                              "predicate_test": predicateTest,
2000                              "op_class": opClass }, [])
2001        header_output += NeonRegRegOpDeclare.subst(iop)
2002        exec_output += NeonUnequalRegExecute.subst(iop)
2003        for type in types:
2004            substDict = { "targs" : type,
2005                          "class_name" : Name }
2006            exec_output += NeonExecDeclare.subst(substDict)
2007
2008    vhaddCode = '''
2009        Element carryBit =
2010            (((unsigned)srcElem1 & 0x1) +
2011             ((unsigned)srcElem2 & 0x1)) >> 1;
2012        // Use division instead of a shift to ensure the sign extension works
2013        // right. The compiler will figure out if it can be a shift. Mask the
2014        // inputs so they get truncated correctly.
2015        destElem = (((srcElem1 & ~(Element)1) / 2) +
2016                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2017    '''
2018    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2019    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2020
2021    vrhaddCode = '''
2022        Element carryBit =
2023            (((unsigned)srcElem1 & 0x1) +
2024             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2025        // Use division instead of a shift to ensure the sign extension works
2026        // right. The compiler will figure out if it can be a shift. Mask the
2027        // inputs so they get truncated correctly.
2028        destElem = (((srcElem1 & ~(Element)1) / 2) +
2029                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2030    '''
2031    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2032    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2033
2034    vhsubCode = '''
2035        Element barrowBit =
2036            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2037        // Use division instead of a shift to ensure the sign extension works
2038        // right. The compiler will figure out if it can be a shift. Mask the
2039        // inputs so they get truncated correctly.
2040        destElem = (((srcElem1 & ~(Element)1) / 2) -
2041                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2042    '''
2043    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2044    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2045
2046    vandCode = '''
2047        destElem = srcElem1 & srcElem2;
2048    '''
2049    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2050    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2051
2052    vbicCode = '''
2053        destElem = srcElem1 & ~srcElem2;
2054    '''
2055    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2056    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2057
2058    vorrCode = '''
2059        destElem = srcElem1 | srcElem2;
2060    '''
2061    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2062    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2063
2064    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2065    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2066
2067    vornCode = '''
2068        destElem = srcElem1 | ~srcElem2;
2069    '''
2070    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2071    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2072
2073    veorCode = '''
2074        destElem = srcElem1 ^ srcElem2;
2075    '''
2076    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2077    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2078
2079    vbifCode = '''
2080        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2081    '''
2082    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2083    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2084    vbitCode = '''
2085        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2086    '''
2087    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2088    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2089    vbslCode = '''
2090        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2091    '''
2092    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2093    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2094
2095    vmaxCode = '''
2096        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2097    '''
2098    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2099    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2100
2101    vminCode = '''
2102        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2103    '''
2104    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2105    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2106
2107    vaddCode = '''
2108        destElem = srcElem1 + srcElem2;
2109    '''
2110    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2111    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2112
2113    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2114                      2, vaddCode, pairwise=True)
2115    vaddlwCode = '''
2116        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2117    '''
2118    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2119    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2120    vaddhnCode = '''
2121        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2122                   (sizeof(Element) * 8);
2123    '''
2124    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2125    vraddhnCode = '''
2126        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2127                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2128                   (sizeof(Element) * 8);
2129    '''
2130    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2131
2132    vsubCode = '''
2133        destElem = srcElem1 - srcElem2;
2134    '''
2135    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2136    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2137    vsublwCode = '''
2138        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2139    '''
2140    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2141    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2142
2143    vqaddUCode = '''
2144        destElem = srcElem1 + srcElem2;
2145        FPSCR fpscr = (FPSCR) FpscrQc;
2146        if (destElem < srcElem1 || destElem < srcElem2) {
2147            destElem = (Element)(-1);
2148            fpscr.qc = 1;
2149        }
2150        FpscrQc = fpscr;
2151    '''
2152    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2153    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2154    vsubhnCode = '''
2155        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2156                   (sizeof(Element) * 8);
2157    '''
2158    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2159    vrsubhnCode = '''
2160        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2161                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2162                   (sizeof(Element) * 8);
2163    '''
2164    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2165
2166    vqaddSCode = '''
2167        destElem = srcElem1 + srcElem2;
2168        FPSCR fpscr = (FPSCR) FpscrQc;
2169        bool negDest = (destElem < 0);
2170        bool negSrc1 = (srcElem1 < 0);
2171        bool negSrc2 = (srcElem2 < 0);
2172        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2173            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2174            if (negDest)
2175                destElem -= 1;
2176            fpscr.qc = 1;
2177        }
2178        FpscrQc = fpscr;
2179    '''
2180    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2181    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2182
2183    vqsubUCode = '''
2184        destElem = srcElem1 - srcElem2;
2185        FPSCR fpscr = (FPSCR) FpscrQc;
2186        if (destElem > srcElem1) {
2187            destElem = 0;
2188            fpscr.qc = 1;
2189        }
2190        FpscrQc = fpscr;
2191    '''
2192    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2193    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2194
2195    vqsubSCode = '''
2196        destElem = srcElem1 - srcElem2;
2197        FPSCR fpscr = (FPSCR) FpscrQc;
2198        bool negDest = (destElem < 0);
2199        bool negSrc1 = (srcElem1 < 0);
2200        bool posSrc2 = (srcElem2 >= 0);
2201        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2202            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2203            if (negDest)
2204                destElem -= 1;
2205            fpscr.qc = 1;
2206        }
2207        FpscrQc = fpscr;
2208    '''
2209    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2210    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2211
2212    vcgtCode = '''
2213        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2214    '''
2215    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2216    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2217
2218    vcgeCode = '''
2219        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2220    '''
2221    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2222    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2223
2224    vceqCode = '''
2225        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2226    '''
2227    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2228    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2229
2230    vshlCode = '''
2231        int16_t shiftAmt = (int8_t)srcElem2;
2232        if (shiftAmt < 0) {
2233            shiftAmt = -shiftAmt;
2234            if (shiftAmt >= sizeof(Element) * 8) {
2235                shiftAmt = sizeof(Element) * 8 - 1;
2236                destElem = 0;
2237            } else {
2238                destElem = (srcElem1 >> shiftAmt);
2239            }
2240            // Make sure the right shift sign extended when it should.
2241            if (ltz(srcElem1) && !ltz(destElem)) {
2242                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2243                                             1 - shiftAmt));
2244            }
2245        } else {
2246            if (shiftAmt >= sizeof(Element) * 8) {
2247                destElem = 0;
2248            } else {
2249                destElem = srcElem1 << shiftAmt;
2250            }
2251        }
2252    '''
2253    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2254    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2255
2256    vrshlCode = '''
2257        int16_t shiftAmt = (int8_t)srcElem2;
2258        if (shiftAmt < 0) {
2259            shiftAmt = -shiftAmt;
2260            Element rBit = 0;
2261            if (shiftAmt <= sizeof(Element) * 8)
2262                rBit = bits(srcElem1, shiftAmt - 1);
2263            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2264                rBit = 1;
2265            if (shiftAmt >= sizeof(Element) * 8) {
2266                shiftAmt = sizeof(Element) * 8 - 1;
2267                destElem = 0;
2268            } else {
2269                destElem = (srcElem1 >> shiftAmt);
2270            }
2271            // Make sure the right shift sign extended when it should.
2272            if (ltz(srcElem1) && !ltz(destElem)) {
2273                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2274                                             1 - shiftAmt));
2275            }
2276            destElem += rBit;
2277        } else if (shiftAmt > 0) {
2278            if (shiftAmt >= sizeof(Element) * 8) {
2279                destElem = 0;
2280            } else {
2281                destElem = srcElem1 << shiftAmt;
2282            }
2283        } else {
2284            destElem = srcElem1;
2285        }
2286    '''
2287    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2288    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2289
2290    vqshlUCode = '''
2291        int16_t shiftAmt = (int8_t)srcElem2;
2292        FPSCR fpscr = (FPSCR) FpscrQc;
2293        if (shiftAmt < 0) {
2294            shiftAmt = -shiftAmt;
2295            if (shiftAmt >= sizeof(Element) * 8) {
2296                shiftAmt = sizeof(Element) * 8 - 1;
2297                destElem = 0;
2298            } else {
2299                destElem = (srcElem1 >> shiftAmt);
2300            }
2301        } else if (shiftAmt > 0) {
2302            if (shiftAmt >= sizeof(Element) * 8) {
2303                if (srcElem1 != 0) {
2304                    destElem = mask(sizeof(Element) * 8);
2305                    fpscr.qc = 1;
2306                } else {
2307                    destElem = 0;
2308                }
2309            } else {
2310                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2311                            sizeof(Element) * 8 - shiftAmt)) {
2312                    destElem = mask(sizeof(Element) * 8);
2313                    fpscr.qc = 1;
2314                } else {
2315                    destElem = srcElem1 << shiftAmt;
2316                }
2317            }
2318        } else {
2319            destElem = srcElem1;
2320        }
2321        FpscrQc = fpscr;
2322    '''
2323    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2324    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2325
2326    vqshlSCode = '''
2327        int16_t shiftAmt = (int8_t)srcElem2;
2328        FPSCR fpscr = (FPSCR) FpscrQc;
2329        if (shiftAmt < 0) {
2330            shiftAmt = -shiftAmt;
2331            if (shiftAmt >= sizeof(Element) * 8) {
2332                shiftAmt = sizeof(Element) * 8 - 1;
2333                destElem = 0;
2334            } else {
2335                destElem = (srcElem1 >> shiftAmt);
2336            }
2337            // Make sure the right shift sign extended when it should.
2338            if (srcElem1 < 0 && destElem >= 0) {
2339                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2340                                             1 - shiftAmt));
2341            }
2342        } else if (shiftAmt > 0) {
2343            bool sat = false;
2344            if (shiftAmt >= sizeof(Element) * 8) {
2345                if (srcElem1 != 0)
2346                    sat = true;
2347                else
2348                    destElem = 0;
2349            } else {
2350                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2351                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2352                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2353                    sat = true;
2354                } else {
2355                    destElem = srcElem1 << shiftAmt;
2356                }
2357            }
2358            if (sat) {
2359                fpscr.qc = 1;
2360                destElem = mask(sizeof(Element) * 8 - 1);
2361                if (srcElem1 < 0)
2362                    destElem = ~destElem;
2363            }
2364        } else {
2365            destElem = srcElem1;
2366        }
2367        FpscrQc = fpscr;
2368    '''
2369    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2370    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2371
2372    vqrshlUCode = '''
2373        int16_t shiftAmt = (int8_t)srcElem2;
2374        FPSCR fpscr = (FPSCR) FpscrQc;
2375        if (shiftAmt < 0) {
2376            shiftAmt = -shiftAmt;
2377            Element rBit = 0;
2378            if (shiftAmt <= sizeof(Element) * 8)
2379                rBit = bits(srcElem1, shiftAmt - 1);
2380            if (shiftAmt >= sizeof(Element) * 8) {
2381                shiftAmt = sizeof(Element) * 8 - 1;
2382                destElem = 0;
2383            } else {
2384                destElem = (srcElem1 >> shiftAmt);
2385            }
2386            destElem += rBit;
2387        } else {
2388            if (shiftAmt >= sizeof(Element) * 8) {
2389                if (srcElem1 != 0) {
2390                    destElem = mask(sizeof(Element) * 8);
2391                    fpscr.qc = 1;
2392                } else {
2393                    destElem = 0;
2394                }
2395            } else {
2396                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2397                            sizeof(Element) * 8 - shiftAmt)) {
2398                    destElem = mask(sizeof(Element) * 8);
2399                    fpscr.qc = 1;
2400                } else {
2401                    destElem = srcElem1 << shiftAmt;
2402                }
2403            }
2404        }
2405        FpscrQc = fpscr;
2406    '''
2407    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2408    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2409
2410    vqrshlSCode = '''
2411        int16_t shiftAmt = (int8_t)srcElem2;
2412        FPSCR fpscr = (FPSCR) FpscrQc;
2413        if (shiftAmt < 0) {
2414            shiftAmt = -shiftAmt;
2415            Element rBit = 0;
2416            if (shiftAmt <= sizeof(Element) * 8)
2417                rBit = bits(srcElem1, shiftAmt - 1);
2418            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2419                rBit = 1;
2420            if (shiftAmt >= sizeof(Element) * 8) {
2421                shiftAmt = sizeof(Element) * 8 - 1;
2422                destElem = 0;
2423            } else {
2424                destElem = (srcElem1 >> shiftAmt);
2425            }
2426            // Make sure the right shift sign extended when it should.
2427            if (srcElem1 < 0 && destElem >= 0) {
2428                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2429                                             1 - shiftAmt));
2430            }
2431            destElem += rBit;
2432        } else if (shiftAmt > 0) {
2433            bool sat = false;
2434            if (shiftAmt >= sizeof(Element) * 8) {
2435                if (srcElem1 != 0)
2436                    sat = true;
2437                else
2438                    destElem = 0;
2439            } else {
2440                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2441                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2442                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2443                    sat = true;
2444                } else {
2445                    destElem = srcElem1 << shiftAmt;
2446                }
2447            }
2448            if (sat) {
2449                fpscr.qc = 1;
2450                destElem = mask(sizeof(Element) * 8 - 1);
2451                if (srcElem1 < 0)
2452                    destElem = ~destElem;
2453            }
2454        } else {
2455            destElem = srcElem1;
2456        }
2457        FpscrQc = fpscr;
2458    '''
2459    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2460    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2461
2462    vabaCode = '''
2463        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2464                                            (srcElem2 - srcElem1);
2465    '''
2466    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2467    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2468    vabalCode = '''
2469        destElem += (srcElem1 > srcElem2) ?
2470            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2471            ((BigElement)srcElem2 - (BigElement)srcElem1);
2472    '''
2473    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2474
2475    vabdCode = '''
2476        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2477                                           (srcElem2 - srcElem1);
2478    '''
2479    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2480    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2481    vabdlCode = '''
2482        destElem = (srcElem1 > srcElem2) ?
2483            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2484            ((BigElement)srcElem2 - (BigElement)srcElem1);
2485    '''
2486    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2487
2488    vtstCode = '''
2489        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2490    '''
2491    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2492    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2493
2494    vmulCode = '''
2495        destElem = srcElem1 * srcElem2;
2496    '''
2497    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2498    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2499    vmullCode = '''
2500        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2501    '''
2502    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2503
2504    vmlaCode = '''
2505        destElem = destElem + srcElem1 * srcElem2;
2506    '''
2507    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2508    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2509    vmlalCode = '''
2510        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2511    '''
2512    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2513
2514    vqdmlalCode = '''
2515        FPSCR fpscr = (FPSCR) FpscrQc;
2516        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2517        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2518        Element halfNeg = maxNeg / 2;
2519        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2520            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2521            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2522            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2523            fpscr.qc = 1;
2524        }
2525        bool negPreDest = ltz(destElem);
2526        destElem += midElem;
2527        bool negDest = ltz(destElem);
2528        bool negMid = ltz(midElem);
2529        if (negPreDest == negMid && negMid != negDest) {
2530            destElem = mask(sizeof(BigElement) * 8 - 1);
2531            if (negPreDest)
2532                destElem = ~destElem;
2533            fpscr.qc = 1;
2534        }
2535        FpscrQc = fpscr;
2536    '''
2537    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2538
2539    vqdmlslCode = '''
2540        FPSCR fpscr = (FPSCR) FpscrQc;
2541        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2542        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2543        Element halfNeg = maxNeg / 2;
2544        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2545            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2546            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2547            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2548            fpscr.qc = 1;
2549        }
2550        bool negPreDest = ltz(destElem);
2551        destElem -= midElem;
2552        bool negDest = ltz(destElem);
2553        bool posMid = ltz((BigElement)-midElem);
2554        if (negPreDest == posMid && posMid != negDest) {
2555            destElem = mask(sizeof(BigElement) * 8 - 1);
2556            if (negPreDest)
2557                destElem = ~destElem;
2558            fpscr.qc = 1;
2559        }
2560        FpscrQc = fpscr;
2561    '''
2562    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2563
2564    vqdmullCode = '''
2565        FPSCR fpscr = (FPSCR) FpscrQc;
2566        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2567        if (srcElem1 == srcElem2 &&
2568                srcElem1 == (Element)((Element)1 <<
2569                    (Element)(sizeof(Element) * 8 - 1))) {
2570            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2571            fpscr.qc = 1;
2572        }
2573        FpscrQc = fpscr;
2574    '''
2575    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2576
2577    vmlsCode = '''
2578        destElem = destElem - srcElem1 * srcElem2;
2579    '''
2580    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2581    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2582    vmlslCode = '''
2583        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2584    '''
2585    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2586
2587    vmulpCode = '''
2588        destElem = 0;
2589        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2590            if (bits(srcElem2, j))
2591                destElem ^= srcElem1 << j;
2592        }
2593    '''
2594    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2595    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2596    vmullpCode = '''
2597        destElem = 0;
2598        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2599            if (bits(srcElem2, j))
2600                destElem ^= (BigElement)srcElem1 << j;
2601        }
2602    '''
2603    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2604
2605    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2606
2607    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2608
2609    vqdmulhCode = '''
2610        FPSCR fpscr = (FPSCR) FpscrQc;
2611        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2612                   (sizeof(Element) * 8);
2613        if (srcElem1 == srcElem2 &&
2614                srcElem1 == (Element)((Element)1 <<
2615                    (sizeof(Element) * 8 - 1))) {
2616            destElem = ~srcElem1;
2617            fpscr.qc = 1;
2618        }
2619        FpscrQc = fpscr;
2620    '''
2621    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2622    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2623
2624    vqrdmulhCode = '''
2625        FPSCR fpscr = (FPSCR) FpscrQc;
2626        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2627                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2628                   (sizeof(Element) * 8);
2629        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2630        Element halfNeg = maxNeg / 2;
2631        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2632            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2633            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2634            if (destElem < 0) {
2635                destElem = mask(sizeof(Element) * 8 - 1);
2636            } else {
2637                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2638            }
2639            fpscr.qc = 1;
2640        }
2641        FpscrQc = fpscr;
2642    '''
2643    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2644            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2645    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2646            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2647
2648    vmaxfpCode = '''
2649        FPSCR fpscr = (FPSCR) FpscrExc;
2650        bool done;
2651        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2652        if (!done) {
2653            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2654                               true, true, VfpRoundNearest);
2655        } else if (flushToZero(srcReg1, srcReg2)) {
2656            fpscr.idc = 1;
2657        }
2658        FpscrExc = fpscr;
2659    '''
2660    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2661    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2662
2663    vminfpCode = '''
2664        FPSCR fpscr = (FPSCR) FpscrExc;
2665        bool done;
2666        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2667        if (!done) {
2668            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2669                               true, true, VfpRoundNearest);
2670        } else if (flushToZero(srcReg1, srcReg2)) {
2671            fpscr.idc = 1;
2672        }
2673        FpscrExc = fpscr;
2674    '''
2675    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2676    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2677
2678    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2679                        2, vmaxfpCode, pairwise=True)
2680    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2681                        4, vmaxfpCode, pairwise=True)
2682
2683    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2684                        2, vminfpCode, pairwise=True)
2685    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2686                        4, vminfpCode, pairwise=True)
2687
2688    vaddfpCode = '''
2689        FPSCR fpscr = (FPSCR) FpscrExc;
2690        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2691                           true, true, VfpRoundNearest);
2692        FpscrExc = fpscr;
2693    '''
2694    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2695    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2696
2697    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2698                        2, vaddfpCode, pairwise=True)
2699    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2700                        4, vaddfpCode, pairwise=True)
2701
2702    vsubfpCode = '''
2703        FPSCR fpscr = (FPSCR) FpscrExc;
2704        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2705                           true, true, VfpRoundNearest);
2706        FpscrExc = fpscr;
2707    '''
2708    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2709    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2710
2711    vmulfpCode = '''
2712        FPSCR fpscr = (FPSCR) FpscrExc;
2713        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2714                           true, true, VfpRoundNearest);
2715        FpscrExc = fpscr;
2716    '''
2717    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2718    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2719
2720    vmlafpCode = '''
2721        FPSCR fpscr = (FPSCR) FpscrExc;
2722        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2723                             true, true, VfpRoundNearest);
2724        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2725                           true, true, VfpRoundNearest);
2726        FpscrExc = fpscr;
2727    '''
2728    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2729    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2730
2731    vfmafpCode = '''
2732        FPSCR fpscr = (FPSCR) FpscrExc;
2733        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2734                            true, true, VfpRoundNearest);
2735        FpscrExc = fpscr;
2736    '''
2737    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2738    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2739
2740    vfmsfpCode = '''
2741        FPSCR fpscr = (FPSCR) FpscrExc;
2742        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2743                            true, true, VfpRoundNearest);
2744        FpscrExc = fpscr;
2745    '''
2746    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2747    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2748
2749    vmlsfpCode = '''
2750        FPSCR fpscr = (FPSCR) FpscrExc;
2751        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2752                             true, true, VfpRoundNearest);
2753        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2754                           true, true, VfpRoundNearest);
2755        FpscrExc = fpscr;
2756    '''
2757    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2758    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2759
2760    vcgtfpCode = '''
2761        FPSCR fpscr = (FPSCR) FpscrExc;
2762        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2763                             true, true, VfpRoundNearest);
2764        destReg = (res == 0) ? -1 : 0;
2765        if (res == 2.0)
2766            fpscr.ioc = 1;
2767        FpscrExc = fpscr;
2768    '''
2769    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2770            2, vcgtfpCode, toInt = True)
2771    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2772            4, vcgtfpCode, toInt = True)
2773
2774    vcgefpCode = '''
2775        FPSCR fpscr = (FPSCR) FpscrExc;
2776        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2777                             true, true, VfpRoundNearest);
2778        destReg = (res == 0) ? -1 : 0;
2779        if (res == 2.0)
2780            fpscr.ioc = 1;
2781        FpscrExc = fpscr;
2782    '''
2783    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2784            2, vcgefpCode, toInt = True)
2785    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2786            4, vcgefpCode, toInt = True)
2787
2788    vacgtfpCode = '''
2789        FPSCR fpscr = (FPSCR) FpscrExc;
2790        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2791                             true, true, VfpRoundNearest);
2792        destReg = (res == 0) ? -1 : 0;
2793        if (res == 2.0)
2794            fpscr.ioc = 1;
2795        FpscrExc = fpscr;
2796    '''
2797    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2798            2, vacgtfpCode, toInt = True)
2799    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2800            4, vacgtfpCode, toInt = True)
2801
2802    vacgefpCode = '''
2803        FPSCR fpscr = (FPSCR) FpscrExc;
2804        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2805                             true, true, VfpRoundNearest);
2806        destReg = (res == 0) ? -1 : 0;
2807        if (res == 2.0)
2808            fpscr.ioc = 1;
2809        FpscrExc = fpscr;
2810    '''
2811    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2812            2, vacgefpCode, toInt = True)
2813    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2814            4, vacgefpCode, toInt = True)
2815
2816    vceqfpCode = '''
2817        FPSCR fpscr = (FPSCR) FpscrExc;
2818        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2819                             true, true, VfpRoundNearest);
2820        destReg = (res == 0) ? -1 : 0;
2821        if (res == 2.0)
2822            fpscr.ioc = 1;
2823        FpscrExc = fpscr;
2824    '''
2825    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2826            2, vceqfpCode, toInt = True)
2827    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2828            4, vceqfpCode, toInt = True)
2829
2830    vrecpsCode = '''
2831        FPSCR fpscr = (FPSCR) FpscrExc;
2832        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2833                           true, true, VfpRoundNearest);
2834        FpscrExc = fpscr;
2835    '''
2836    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2837    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2838
2839    vrsqrtsCode = '''
2840        FPSCR fpscr = (FPSCR) FpscrExc;
2841        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2842                           true, true, VfpRoundNearest);
2843        FpscrExc = fpscr;
2844    '''
2845    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2846    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2847
2848    vabdfpCode = '''
2849        FPSCR fpscr = (FPSCR) FpscrExc;
2850        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2851                             true, true, VfpRoundNearest);
2852        destReg = fabs(mid);
2853        FpscrExc = fpscr;
2854    '''
2855    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2856    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2857
2858    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2859    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2860    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2861    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2862    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2863
2864    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2865    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2866    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2867    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2868    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2869
2870    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2871    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2872    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2873    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2874    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2875
2876    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2877    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2878    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2879    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2880    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2881    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2882            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2883    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2884            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2885
2886    vshrCode = '''
2887        if (imm >= sizeof(srcElem1) * 8) {
2888            if (ltz(srcElem1))
2889                destElem = -1;
2890            else
2891                destElem = 0;
2892        } else {
2893            destElem = srcElem1 >> imm;
2894        }
2895    '''
2896    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2897    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2898
2899    vsraCode = '''
2900        Element mid;;
2901        if (imm >= sizeof(srcElem1) * 8) {
2902            mid = ltz(srcElem1) ? -1 : 0;
2903        } else {
2904            mid = srcElem1 >> imm;
2905            if (ltz(srcElem1) && !ltz(mid)) {
2906                mid |= -(mid & ((Element)1 <<
2907                            (sizeof(Element) * 8 - 1 - imm)));
2908            }
2909        }
2910        destElem += mid;
2911    '''
2912    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2913    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2914
2915    vrshrCode = '''
2916        if (imm > sizeof(srcElem1) * 8) {
2917            destElem = 0;
2918        } else if (imm) {
2919            Element rBit = bits(srcElem1, imm - 1);
2920            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2921        } else {
2922            destElem = srcElem1;
2923        }
2924    '''
2925    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2926    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2927
2928    vrsraCode = '''
2929        if (imm > sizeof(srcElem1) * 8) {
2930            destElem += 0;
2931        } else if (imm) {
2932            Element rBit = bits(srcElem1, imm - 1);
2933            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2934        } else {
2935            destElem += srcElem1;
2936        }
2937    '''
2938    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2939    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2940
2941    vsriCode = '''
2942        if (imm >= sizeof(Element) * 8) {
2943            destElem = destElem;
2944        } else {
2945            destElem = (srcElem1 >> imm) |
2946                (destElem & ~mask(sizeof(Element) * 8 - imm));
2947        }
2948    '''
2949    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2950    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2951
2952    vshlCode = '''
2953        if (imm >= sizeof(Element) * 8) {
2954            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2955        } else {
2956            destElem = srcElem1 << imm;
2957        }
2958    '''
2959    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2960    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2961
2962    vsliCode = '''
2963        if (imm >= sizeof(Element) * 8) {
2964            destElem = destElem;
2965        } else {
2966            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2967        }
2968    '''
2969    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2970    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2971
2972    vqshlCode = '''
2973        FPSCR fpscr = (FPSCR) FpscrQc;
2974        if (imm >= sizeof(Element) * 8) {
2975            if (srcElem1 != 0) {
2976                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2977                if (srcElem1 > 0)
2978                    destElem = ~destElem;
2979                fpscr.qc = 1;
2980            } else {
2981                destElem = 0;
2982            }
2983        } else if (imm) {
2984            destElem = (srcElem1 << imm);
2985            uint64_t topBits = bits((uint64_t)srcElem1,
2986                                    sizeof(Element) * 8 - 1,
2987                                    sizeof(Element) * 8 - 1 - imm);
2988            if (topBits != 0 && topBits != mask(imm + 1)) {
2989                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2990                if (srcElem1 > 0)
2991                    destElem = ~destElem;
2992                fpscr.qc = 1;
2993            }
2994        } else {
2995            destElem = srcElem1;
2996        }
2997        FpscrQc = fpscr;
2998    '''
2999    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3000    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3001
3002    vqshluCode = '''
3003        FPSCR fpscr = (FPSCR) FpscrQc;
3004        if (imm >= sizeof(Element) * 8) {
3005            if (srcElem1 != 0) {
3006                destElem = mask(sizeof(Element) * 8);
3007                fpscr.qc = 1;
3008            } else {
3009                destElem = 0;
3010            }
3011        } else if (imm) {
3012            destElem = (srcElem1 << imm);
3013            uint64_t topBits = bits((uint64_t)srcElem1,
3014                                    sizeof(Element) * 8 - 1,
3015                                    sizeof(Element) * 8 - imm);
3016            if (topBits != 0) {
3017                destElem = mask(sizeof(Element) * 8);
3018                fpscr.qc = 1;
3019            }
3020        } else {
3021            destElem = srcElem1;
3022        }
3023        FpscrQc = fpscr;
3024    '''
3025    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3026    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3027
3028    vqshlusCode = '''
3029        FPSCR fpscr = (FPSCR) FpscrQc;
3030        if (imm >= sizeof(Element) * 8) {
3031            if (srcElem1 < 0) {
3032                destElem = 0;
3033                fpscr.qc = 1;
3034            } else if (srcElem1 > 0) {
3035                destElem = mask(sizeof(Element) * 8);
3036                fpscr.qc = 1;
3037            } else {
3038                destElem = 0;
3039            }
3040        } else if (imm) {
3041            destElem = (srcElem1 << imm);
3042            uint64_t topBits = bits((uint64_t)srcElem1,
3043                                    sizeof(Element) * 8 - 1,
3044                                    sizeof(Element) * 8 - imm);
3045            if (srcElem1 < 0) {
3046                destElem = 0;
3047                fpscr.qc = 1;
3048            } else if (topBits != 0) {
3049                destElem = mask(sizeof(Element) * 8);
3050                fpscr.qc = 1;
3051            }
3052        } else {
3053            if (srcElem1 < 0) {
3054                fpscr.qc = 1;
3055                destElem = 0;
3056            } else {
3057                destElem = srcElem1;
3058            }
3059        }
3060        FpscrQc = fpscr;
3061    '''
3062    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3063    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3064
3065    vshrnCode = '''
3066        if (imm >= sizeof(srcElem1) * 8) {
3067            destElem = 0;
3068        } else {
3069            destElem = srcElem1 >> imm;
3070        }
3071    '''
3072    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3073
3074    vrshrnCode = '''
3075        if (imm > sizeof(srcElem1) * 8) {
3076            destElem = 0;
3077        } else if (imm) {
3078            Element rBit = bits(srcElem1, imm - 1);
3079            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3080        } else {
3081            destElem = srcElem1;
3082        }
3083    '''
3084    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3085
3086    vqshrnCode = '''
3087        FPSCR fpscr = (FPSCR) FpscrQc;
3088        if (imm > sizeof(srcElem1) * 8) {
3089            if (srcElem1 != 0 && srcElem1 != -1)
3090                fpscr.qc = 1;
3091            destElem = 0;
3092        } else if (imm) {
3093            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3094            mid |= -(mid & ((BigElement)1 <<
3095                        (sizeof(BigElement) * 8 - 1 - imm)));
3096            if (mid != (Element)mid) {
3097                destElem = mask(sizeof(Element) * 8 - 1);
3098                if (srcElem1 < 0)
3099                    destElem = ~destElem;
3100                fpscr.qc = 1;
3101            } else {
3102                destElem = mid;
3103            }
3104        } else {
3105            destElem = srcElem1;
3106        }
3107        FpscrQc = fpscr;
3108    '''
3109    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3110
3111    vqshrunCode = '''
3112        FPSCR fpscr = (FPSCR) FpscrQc;
3113        if (imm > sizeof(srcElem1) * 8) {
3114            if (srcElem1 != 0)
3115                fpscr.qc = 1;
3116            destElem = 0;
3117        } else if (imm) {
3118            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3119            if (mid != (Element)mid) {
3120                destElem = mask(sizeof(Element) * 8);
3121                fpscr.qc = 1;
3122            } else {
3123                destElem = mid;
3124            }
3125        } else {
3126            destElem = srcElem1;
3127        }
3128        FpscrQc = fpscr;
3129    '''
3130    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3131                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3132
3133    vqshrunsCode = '''
3134        FPSCR fpscr = (FPSCR) FpscrQc;
3135        if (imm > sizeof(srcElem1) * 8) {
3136            if (srcElem1 != 0)
3137                fpscr.qc = 1;
3138            destElem = 0;
3139        } else if (imm) {
3140            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3141            if (bits(mid, sizeof(BigElement) * 8 - 1,
3142                          sizeof(Element) * 8) != 0) {
3143                if (srcElem1 < 0) {
3144                    destElem = 0;
3145                } else {
3146                    destElem = mask(sizeof(Element) * 8);
3147                }
3148                fpscr.qc = 1;
3149            } else {
3150                destElem = mid;
3151            }
3152        } else {
3153            destElem = srcElem1;
3154        }
3155        FpscrQc = fpscr;
3156    '''
3157    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3158                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3159
3160    vqrshrnCode = '''
3161        FPSCR fpscr = (FPSCR) FpscrQc;
3162        if (imm > sizeof(srcElem1) * 8) {
3163            if (srcElem1 != 0 && srcElem1 != -1)
3164                fpscr.qc = 1;
3165            destElem = 0;
3166        } else if (imm) {
3167            BigElement mid = (srcElem1 >> (imm - 1));
3168            uint64_t rBit = mid & 0x1;
3169            mid >>= 1;
3170            mid |= -(mid & ((BigElement)1 <<
3171                        (sizeof(BigElement) * 8 - 1 - imm)));
3172            mid += rBit;
3173            if (mid != (Element)mid) {
3174                destElem = mask(sizeof(Element) * 8 - 1);
3175                if (srcElem1 < 0)
3176                    destElem = ~destElem;
3177                fpscr.qc = 1;
3178            } else {
3179                destElem = mid;
3180            }
3181        } else {
3182            if (srcElem1 != (Element)srcElem1) {
3183                destElem = mask(sizeof(Element) * 8 - 1);
3184                if (srcElem1 < 0)
3185                    destElem = ~destElem;
3186                fpscr.qc = 1;
3187            } else {
3188                destElem = srcElem1;
3189            }
3190        }
3191        FpscrQc = fpscr;
3192    '''
3193    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3194                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3195
3196    vqrshrunCode = '''
3197        FPSCR fpscr = (FPSCR) FpscrQc;
3198        if (imm > sizeof(srcElem1) * 8) {
3199            if (srcElem1 != 0)
3200                fpscr.qc = 1;
3201            destElem = 0;
3202        } else if (imm) {
3203            BigElement mid = (srcElem1 >> (imm - 1));
3204            uint64_t rBit = mid & 0x1;
3205            mid >>= 1;
3206            mid += rBit;
3207            if (mid != (Element)mid) {
3208                destElem = mask(sizeof(Element) * 8);
3209                fpscr.qc = 1;
3210            } else {
3211                destElem = mid;
3212            }
3213        } else {
3214            if (srcElem1 != (Element)srcElem1) {
3215                destElem = mask(sizeof(Element) * 8 - 1);
3216                fpscr.qc = 1;
3217            } else {
3218                destElem = srcElem1;
3219            }
3220        }
3221        FpscrQc = fpscr;
3222    '''
3223    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3224                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3225
3226    vqrshrunsCode = '''
3227        FPSCR fpscr = (FPSCR) FpscrQc;
3228        if (imm > sizeof(srcElem1) * 8) {
3229            if (srcElem1 != 0)
3230                fpscr.qc = 1;
3231            destElem = 0;
3232        } else if (imm) {
3233            BigElement mid = (srcElem1 >> (imm - 1));
3234            uint64_t rBit = mid & 0x1;
3235            mid >>= 1;
3236            mid |= -(mid & ((BigElement)1 <<
3237                            (sizeof(BigElement) * 8 - 1 - imm)));
3238            mid += rBit;
3239            if (bits(mid, sizeof(BigElement) * 8 - 1,
3240                          sizeof(Element) * 8) != 0) {
3241                if (srcElem1 < 0) {
3242                    destElem = 0;
3243                } else {
3244                    destElem = mask(sizeof(Element) * 8);
3245                }
3246                fpscr.qc = 1;
3247            } else {
3248                destElem = mid;
3249            }
3250        } else {
3251            if (srcElem1 < 0) {
3252                fpscr.qc = 1;
3253                destElem = 0;
3254            } else {
3255                destElem = srcElem1;
3256            }
3257        }
3258        FpscrQc = fpscr;
3259    '''
3260    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3261                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3262
3263    vshllCode = '''
3264        if (imm >= sizeof(destElem) * 8) {
3265            destElem = 0;
3266        } else {
3267            destElem = (BigElement)srcElem1 << imm;
3268        }
3269    '''
3270    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3271
3272    vmovlCode = '''
3273        destElem = srcElem1;
3274    '''
3275    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3276
3277    vcvt2ufxCode = '''
3278        FPSCR fpscr = (FPSCR) FpscrExc;
3279        if (flushToZero(srcElem1))
3280            fpscr.idc = 1;
3281        VfpSavedState state = prepFpState(VfpRoundNearest);
3282        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3283        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3284        __asm__ __volatile__("" :: "m" (destReg));
3285        finishVfp(fpscr, state, true);
3286        FpscrExc = fpscr;
3287    '''
3288    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3289            2, vcvt2ufxCode, toInt = True)
3290    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3291            4, vcvt2ufxCode, toInt = True)
3292
3293    vcvt2sfxCode = '''
3294        FPSCR fpscr = (FPSCR) FpscrExc;
3295        if (flushToZero(srcElem1))
3296            fpscr.idc = 1;
3297        VfpSavedState state = prepFpState(VfpRoundNearest);
3298        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3299        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3300        __asm__ __volatile__("" :: "m" (destReg));
3301        finishVfp(fpscr, state, true);
3302        FpscrExc = fpscr;
3303    '''
3304    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3305            2, vcvt2sfxCode, toInt = True)
3306    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3307            4, vcvt2sfxCode, toInt = True)
3308
3309    vcvtu2fpCode = '''
3310        FPSCR fpscr = (FPSCR) FpscrExc;
3311        VfpSavedState state = prepFpState(VfpRoundNearest);
3312        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3313        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3314        __asm__ __volatile__("" :: "m" (destElem));
3315        finishVfp(fpscr, state, true);
3316        FpscrExc = fpscr;
3317    '''
3318    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3319            2, vcvtu2fpCode, fromInt = True)
3320    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3321            4, vcvtu2fpCode, fromInt = True)
3322
3323    vcvts2fpCode = '''
3324        FPSCR fpscr = (FPSCR) FpscrExc;
3325        VfpSavedState state = prepFpState(VfpRoundNearest);
3326        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3327        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3328        __asm__ __volatile__("" :: "m" (destElem));
3329        finishVfp(fpscr, state, true);
3330        FpscrExc = fpscr;
3331    '''
3332    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3333            2, vcvts2fpCode, fromInt = True)
3334    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3335            4, vcvts2fpCode, fromInt = True)
3336
3337    vcvts2hCode = '''
3338        destElem = 0;
3339        FPSCR fpscr = (FPSCR) FpscrExc;
3340        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3341        if (flushToZero(srcFp1))
3342            fpscr.idc = 1;
3343        VfpSavedState state = prepFpState(VfpRoundNearest);
3344        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3345                                : "m" (srcFp1), "m" (destElem));
3346        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3347                              fpscr.ahp, srcFp1);
3348        __asm__ __volatile__("" :: "m" (destElem));
3349        finishVfp(fpscr, state, true);
3350        FpscrExc = fpscr;
3351    '''
3352    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3353
3354    vcvth2sCode = '''
3355        destElem = 0;
3356        FPSCR fpscr = (FPSCR) FpscrExc;
3357        VfpSavedState state = prepFpState(VfpRoundNearest);
3358        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3359                                : "m" (srcElem1), "m" (destElem));
3360        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3361        __asm__ __volatile__("" :: "m" (destElem));
3362        finishVfp(fpscr, state, true);
3363        FpscrExc = fpscr;
3364    '''
3365    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3366
3367    vrsqrteCode = '''
3368        destElem = unsignedRSqrtEstimate(srcElem1);
3369    '''
3370    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3371    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3372
3373    vrsqrtefpCode = '''
3374        FPSCR fpscr = (FPSCR) FpscrExc;
3375        if (flushToZero(srcReg1))
3376            fpscr.idc = 1;
3377        destReg = fprSqrtEstimate(fpscr, srcReg1);
3378        FpscrExc = fpscr;
3379    '''
3380    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3381    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3382
3383    vrecpeCode = '''
3384        destElem = unsignedRecipEstimate(srcElem1);
3385    '''
3386    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3387    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3388
3389    vrecpefpCode = '''
3390        FPSCR fpscr = (FPSCR) FpscrExc;
3391        if (flushToZero(srcReg1))
3392            fpscr.idc = 1;
3393        destReg = fpRecipEstimate(fpscr, srcReg1);
3394        FpscrExc = fpscr;
3395    '''
3396    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3397    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3398
3399    vrev16Code = '''
3400        destElem = srcElem1;
3401        unsigned groupSize = ((1 << 1) / sizeof(Element));
3402        unsigned reverseMask = (groupSize - 1);
3403        j = i ^ reverseMask;
3404    '''
3405    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3406    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3407    vrev32Code = '''
3408        destElem = srcElem1;
3409        unsigned groupSize = ((1 << 2) / sizeof(Element));
3410        unsigned reverseMask = (groupSize - 1);
3411        j = i ^ reverseMask;
3412    '''
3413    twoRegMiscInst("vrev32", "NVrev32D",
3414            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3415    twoRegMiscInst("vrev32", "NVrev32Q",
3416            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3417    vrev64Code = '''
3418        destElem = srcElem1;
3419        unsigned groupSize = ((1 << 3) / sizeof(Element));
3420        unsigned reverseMask = (groupSize - 1);
3421        j = i ^ reverseMask;
3422    '''
3423    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3424    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3425
3426    split('exec')
3427    exec_output += vcompares + vcomparesL
3428
3429    vpaddlCode = '''
3430        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3431    '''
3432    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3433    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3434
3435    vpadalCode = '''
3436        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3437    '''
3438    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3439    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3440
3441    vclsCode = '''
3442        unsigned count = 0;
3443        if (srcElem1 < 0) {
3444            srcElem1 <<= 1;
3445            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3446                count++;
3447                srcElem1 <<= 1;
3448            }
3449        } else {
3450            srcElem1 <<= 1;
3451            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3452                count++;
3453                srcElem1 <<= 1;
3454            }
3455        }
3456        destElem = count;
3457    '''
3458    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3459    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3460
3461    vclzCode = '''
3462        unsigned count = 0;
3463        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3464            count++;
3465            srcElem1 <<= 1;
3466        }
3467        destElem = count;
3468    '''
3469    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3470    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3471
3472    vcntCode = '''
3473        unsigned count = 0;
3474        while (srcElem1 && count < sizeof(Element) * 8) {
3475            count += srcElem1 & 0x1;
3476            srcElem1 >>= 1;
3477        }
3478        destElem = count;
3479    '''
3480
3481    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3482    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3483
3484    vmvnCode = '''
3485        destElem = ~srcElem1;
3486    '''
3487    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3488    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3489
3490    vqabsCode = '''
3491        FPSCR fpscr = (FPSCR) FpscrQc;
3492        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3493            fpscr.qc = 1;
3494            destElem = ~srcElem1;
3495        } else if (srcElem1 < 0) {
3496            destElem = -srcElem1;
3497        } else {
3498            destElem = srcElem1;
3499        }
3500        FpscrQc = fpscr;
3501    '''
3502    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3503    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3504
3505    vqnegCode = '''
3506        FPSCR fpscr = (FPSCR) FpscrQc;
3507        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3508            fpscr.qc = 1;
3509            destElem = ~srcElem1;
3510        } else {
3511            destElem = -srcElem1;
3512        }
3513        FpscrQc = fpscr;
3514    '''
3515    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3516    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3517
3518    vabsCode = '''
3519        if (srcElem1 < 0) {
3520            destElem = -srcElem1;
3521        } else {
3522            destElem = srcElem1;
3523        }
3524    '''
3525
3526    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3527    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3528    vabsfpCode = '''
3529        union
3530        {
3531            uint32_t i;
3532            float f;
3533        } cStruct;
3534        cStruct.f = srcReg1;
3535        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3536        destReg = cStruct.f;
3537    '''
3538    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3539    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3540
3541    vnegCode = '''
3542        destElem = -srcElem1;
3543    '''
3544    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3545    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3546    vnegfpCode = '''
3547        destReg = -srcReg1;
3548    '''
3549    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3550    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3551
3552    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3553    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3554    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3555    vcgtfpCode = '''
3556        FPSCR fpscr = (FPSCR) FpscrExc;
3557        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3558                             true, true, VfpRoundNearest);
3559        destReg = (res == 0) ? -1 : 0;
3560        if (res == 2.0)
3561            fpscr.ioc = 1;
3562        FpscrExc = fpscr;
3563    '''
3564    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3565            2, vcgtfpCode, toInt = True)
3566    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3567            4, vcgtfpCode, toInt = True)
3568
3569    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3570    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3571    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3572    vcgefpCode = '''
3573        FPSCR fpscr = (FPSCR) FpscrExc;
3574        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3575                             true, true, VfpRoundNearest);
3576        destReg = (res == 0) ? -1 : 0;
3577        if (res == 2.0)
3578            fpscr.ioc = 1;
3579        FpscrExc = fpscr;
3580    '''
3581    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3582            2, vcgefpCode, toInt = True)
3583    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3584            4, vcgefpCode, toInt = True)
3585
3586    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3587    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3588    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3589    vceqfpCode = '''
3590        FPSCR fpscr = (FPSCR) FpscrExc;
3591        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3592                             true, true, VfpRoundNearest);
3593        destReg = (res == 0) ? -1 : 0;
3594        if (res == 2.0)
3595            fpscr.ioc = 1;
3596        FpscrExc = fpscr;
3597    '''
3598    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3599            2, vceqfpCode, toInt = True)
3600    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3601            4, vceqfpCode, toInt = True)
3602
3603    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3604    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3605    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3606    vclefpCode = '''
3607        FPSCR fpscr = (FPSCR) FpscrExc;
3608        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3609                             true, true, VfpRoundNearest);
3610        destReg = (res == 0) ? -1 : 0;
3611        if (res == 2.0)
3612            fpscr.ioc = 1;
3613        FpscrExc = fpscr;
3614    '''
3615    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3616            2, vclefpCode, toInt = True)
3617    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3618            4, vclefpCode, toInt = True)
3619
3620    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3621    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3622    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3623    vcltfpCode = '''
3624        FPSCR fpscr = (FPSCR) FpscrExc;
3625        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3626                             true, true, VfpRoundNearest);
3627        destReg = (res == 0) ? -1 : 0;
3628        if (res == 2.0)
3629            fpscr.ioc = 1;
3630        FpscrExc = fpscr;
3631    '''
3632    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3633            2, vcltfpCode, toInt = True)
3634    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3635            4, vcltfpCode, toInt = True)
3636
3637    vswpCode = '''
3638        FloatRegBits mid;
3639        for (unsigned r = 0; r < rCount; r++) {
3640            mid = srcReg1.regs[r];
3641            srcReg1.regs[r] = destReg.regs[r];
3642            destReg.regs[r] = mid;
3643        }
3644    '''
3645    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3646    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3647
3648    vtrnCode = '''
3649        Element mid;
3650        for (unsigned i = 0; i < eCount; i += 2) {
3651            mid = srcReg1.elements[i];
3652            srcReg1.elements[i] = destReg.elements[i + 1];
3653            destReg.elements[i + 1] = mid;
3654        }
3655    '''
3656    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3657            smallUnsignedTypes, 2, vtrnCode)
3658    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3659            smallUnsignedTypes, 4, vtrnCode)
3660
3661    vuzpCode = '''
3662        Element mid[eCount];
3663        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3664        for (unsigned i = 0; i < eCount / 2; i++) {
3665            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3666            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3667            destReg.elements[i] = destReg.elements[2 * i];
3668        }
3669        for (unsigned i = 0; i < eCount / 2; i++) {
3670            destReg.elements[eCount / 2 + i] = mid[2 * i];
3671        }
3672    '''
3673    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3674    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3675
3676    vzipCode = '''
3677        Element mid[eCount];
3678        memcpy(&mid, &destReg, sizeof(destReg));
3679        for (unsigned i = 0; i < eCount / 2; i++) {
3680            destReg.elements[2 * i] = mid[i];
3681            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3682        }
3683        for (int i = 0; i < eCount / 2; i++) {
3684            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3685            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3686        }
3687    '''
3688    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3689    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3690
3691    vmovnCode = 'destElem = srcElem1;'
3692    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3693
3694    vdupCode = 'destElem = srcElem1;'
3695    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3696    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3697
3698    def vdupGprInst(name, Name, opClass, types, rCount):
3699        global header_output, exec_output
3700        eWalkCode = simdEnabledCheckCode + '''
3701        RegVect destReg;
3702        for (unsigned i = 0; i < eCount; i++) {
3703            destReg.elements[i] = htog((Element)Op1);
3704        }
3705        '''
3706        for reg in range(rCount):
3707            eWalkCode += '''
3708            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3709            ''' % { "reg" : reg }
3710        iop = InstObjParams(name, Name,
3711                            "RegRegOp",
3712                            { "code": eWalkCode,
3713                              "r_count": rCount,
3714                              "predicate_test": predicateTest,
3715                              "op_class": opClass }, [])
3716        header_output += NeonRegRegOpDeclare.subst(iop)
3717        exec_output += NeonEqualRegExecute.subst(iop)
3718        for type in types:
3719            substDict = { "targs" : type,
3720                          "class_name" : Name }
3721            exec_output += NeonExecDeclare.subst(substDict)
3722    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3723    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3724
3725    vmovCode = 'destElem = imm;'
3726    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3727    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3728
3729    vorrCode = 'destElem |= imm;'
3730    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3731    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3732
3733    vmvnCode = 'destElem = ~imm;'
3734    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3735    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3736
3737    vbicCode = 'destElem &= ~imm;'
3738    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3739    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3740
3741    vqmovnCode = '''
3742    FPSCR fpscr = (FPSCR) FpscrQc;
3743    destElem = srcElem1;
3744    if ((BigElement)destElem != srcElem1) {
3745        fpscr.qc = 1;
3746        destElem = mask(sizeof(Element) * 8 - 1);
3747        if (srcElem1 < 0)
3748            destElem = ~destElem;
3749    }
3750    FpscrQc = fpscr;
3751    '''
3752    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3753
3754    vqmovunCode = '''
3755    FPSCR fpscr = (FPSCR) FpscrQc;
3756    destElem = srcElem1;
3757    if ((BigElement)destElem != srcElem1) {
3758        fpscr.qc = 1;
3759        destElem = mask(sizeof(Element) * 8);
3760    }
3761    FpscrQc = fpscr;
3762    '''
3763    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3764            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3765
3766    vqmovunsCode = '''
3767    FPSCR fpscr = (FPSCR) FpscrQc;
3768    destElem = srcElem1;
3769    if (srcElem1 < 0 ||
3770            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3771        fpscr.qc = 1;
3772        destElem = mask(sizeof(Element) * 8);
3773        if (srcElem1 < 0)
3774            destElem = ~destElem;
3775    }
3776    FpscrQc = fpscr;
3777    '''
3778    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3779            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3780
3781    def buildVext(name, Name, opClass, types, rCount, op):
3782        global header_output, exec_output
3783        eWalkCode = simdEnabledCheckCode + '''
3784        RegVect srcReg1, srcReg2, destReg;
3785        '''
3786        for reg in range(rCount):
3787            eWalkCode += '''
3788                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3789                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3790            ''' % { "reg" : reg }
3791        eWalkCode += op
3792        for reg in range(rCount):
3793            eWalkCode += '''
3794            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3795            ''' % { "reg" : reg }
3796        iop = InstObjParams(name, Name,
3797                            "RegRegRegImmOp",
3798                            { "code": eWalkCode,
3799                              "r_count": rCount,
3800                              "predicate_test": predicateTest,
3801                              "op_class": opClass }, [])
3802        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3803        exec_output += NeonEqualRegExecute.subst(iop)
3804        for type in types:
3805            substDict = { "targs" : type,
3806                          "class_name" : Name }
3807            exec_output += NeonExecDeclare.subst(substDict)
3808
3809    vextCode = '''
3810        for (unsigned i = 0; i < eCount; i++) {
3811            unsigned index = i + imm;
3812            if (index < eCount) {
3813                destReg.elements[i] = srcReg1.elements[index];
3814            } else {
3815                index -= eCount;
3816                if (index >= eCount) {
3817                    fault = std::make_shared<UndefinedInstruction>(machInst,
3818                                                                   false,
3819                                                                   mnemonic);
3820                } else {
3821                    destReg.elements[i] = srcReg2.elements[index];
3822                }
3823            }
3824        }
3825    '''
3826    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3827    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3828
3829    def buildVtbxl(name, Name, opClass, length, isVtbl):
3830        global header_output, decoder_output, exec_output
3831        code = simdEnabledCheckCode + '''
3832            union
3833            {
3834                uint8_t bytes[32];
3835                FloatRegBits regs[8];
3836            } table;
3837
3838            union
3839            {
3840                uint8_t bytes[8];
3841                FloatRegBits regs[2];
3842            } destReg, srcReg2;
3843
3844            const unsigned length = %(length)d;
3845            const bool isVtbl = %(isVtbl)s;
3846
3847            srcReg2.regs[0] = htog(FpOp2P0_uw);
3848            srcReg2.regs[1] = htog(FpOp2P1_uw);
3849
3850            destReg.regs[0] = htog(FpDestP0_uw);
3851            destReg.regs[1] = htog(FpDestP1_uw);
3852        ''' % { "length" : length, "isVtbl" : isVtbl }
3853        for reg in range(8):
3854            if reg < length * 2:
3855                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3856                        { "reg" : reg }
3857            else:
3858                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3859        code += '''
3860        for (unsigned i = 0; i < sizeof(destReg); i++) {
3861            uint8_t index = srcReg2.bytes[i];
3862            if (index < 8 * length) {
3863                destReg.bytes[i] = table.bytes[index];
3864            } else {
3865                if (isVtbl)
3866                    destReg.bytes[i] = 0;
3867                // else destReg.bytes[i] unchanged
3868            }
3869        }
3870
3871        FpDestP0_uw = gtoh(destReg.regs[0]);
3872        FpDestP1_uw = gtoh(destReg.regs[1]);
3873        '''
3874        iop = InstObjParams(name, Name,
3875                            "RegRegRegOp",
3876                            { "code": code,
3877                              "predicate_test": predicateTest,
3878                              "op_class": opClass }, [])
3879        header_output += RegRegRegOpDeclare.subst(iop)
3880        decoder_output += RegRegRegOpConstructor.subst(iop)
3881        exec_output += PredOpExecute.subst(iop)
3882
3883    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3884    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3885    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3886    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3887
3888    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3889    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3890    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3891    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3892}};
3893