neon.isa revision 13978
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                          ExtMachInst machInst, IntRegIndex dest,
98                          IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                          ExtMachInst machInst, IntRegIndex dest,
116                          IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133                             IntRegIndex dest, IntRegIndex op1,
134                             IntRegIndex op2)
135    {
136        switch (size) {
137          case 1:
138            return new Base<int16_t>(machInst, dest, op1, op2);
139          case 2:
140            return new Base<int32_t>(machInst, dest, op1, op2);
141          default:
142            return new Unknown(machInst);
143        }
144    }
145
146    template <template <typename T> class Base>
147    StaticInstPtr
148    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149                                IntRegIndex dest, IntRegIndex op1,
150                                IntRegIndex op2, uint64_t imm)
151    {
152        switch (size) {
153          case 1:
154            return new Base<int16_t>(machInst, dest, op1, op2, imm);
155          case 2:
156            return new Base<int32_t>(machInst, dest, op1, op2, imm);
157          default:
158            return new Unknown(machInst);
159        }
160    }
161
162    template <template <typename T> class Base>
163    StaticInstPtr
164    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165                           ExtMachInst machInst, IntRegIndex dest,
166                           IntRegIndex op1, IntRegIndex op2)
167    {
168        if (notSigned) {
169            return decodeNeonUThreeUSReg<Base>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<Base>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUThreeSReg(bool q, unsigned size,
181                         ExtMachInst machInst, IntRegIndex dest,
182                         IntRegIndex op1, IntRegIndex op2)
183    {
184        if (q) {
185            return decodeNeonUThreeUSReg<BaseQ>(
186                    size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonUThreeUSReg<BaseD>(
189                    size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonSThreeSReg(bool q, unsigned size,
197                         ExtMachInst machInst, IntRegIndex dest,
198                         IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonSThreeUSReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonSThreeUSReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeXReg(bool q, unsigned size,
213                         ExtMachInst machInst, IntRegIndex dest,
214                         IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUSReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUThreeXReg(bool q, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (q) {
233            return decodeNeonUThreeUReg<BaseQ>(
234                    size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonUThreeUSReg<BaseD>(
237                    size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245                          ExtMachInst machInst, IntRegIndex dest,
246                          IntRegIndex op1, IntRegIndex op2)
247    {
248        if (notSigned) {
249            return decodeNeonUThreeSReg<BaseD, BaseQ>(
250                    q, size, machInst, dest, op1, op2);
251        } else {
252            return decodeNeonSThreeSReg<BaseD, BaseQ>(
253                    q, size, machInst, dest, op1, op2);
254        }
255    }
256
257    template <template <typename T> class BaseD,
258              template <typename T> class BaseQ>
259    StaticInstPtr
260    decodeNeonUThreeReg(bool q, unsigned size,
261                        ExtMachInst machInst, IntRegIndex dest,
262                        IntRegIndex op1, IntRegIndex op2)
263    {
264        if (q) {
265            return decodeNeonUThreeUReg<BaseQ>(
266                    size, machInst, dest, op1, op2);
267        } else {
268            return decodeNeonUThreeUReg<BaseD>(
269                    size, machInst, dest, op1, op2);
270        }
271    }
272
273    template <template <typename T> class BaseD,
274              template <typename T> class BaseQ>
275    StaticInstPtr
276    decodeNeonSThreeReg(bool q, unsigned size,
277                        ExtMachInst machInst, IntRegIndex dest,
278                        IntRegIndex op1, IntRegIndex op2)
279    {
280        if (q) {
281            return decodeNeonSThreeUReg<BaseQ>(
282                    size, machInst, dest, op1, op2);
283        } else {
284            return decodeNeonSThreeUReg<BaseD>(
285                    size, machInst, dest, op1, op2);
286        }
287    }
288
289    template <template <typename T> class BaseD,
290              template <typename T> class BaseQ>
291    StaticInstPtr
292    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293                         ExtMachInst machInst, IntRegIndex dest,
294                         IntRegIndex op1, IntRegIndex op2)
295    {
296        if (notSigned) {
297            return decodeNeonUThreeReg<BaseD, BaseQ>(
298                    q, size, machInst, dest, op1, op2);
299        } else {
300            return decodeNeonSThreeReg<BaseD, BaseQ>(
301                    q, size, machInst, dest, op1, op2);
302        }
303    }
304
305    template <template <typename T> class BaseD,
306              template <typename T> class BaseQ>
307    StaticInstPtr
308    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310    {
311        if (q) {
312            if (size)
313                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314            else
315                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316        } else {
317            if (size)
318                return new Unknown(machInst);
319            else
320                return new BaseD<uint32_t>(machInst, dest, op1, op2);
321        }
322    }
323
324    template <template <typename T> class Base>
325    StaticInstPtr
326    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328    {
329        if (size)
330            return new Base<uint64_t>(machInst, dest, op1, op2);
331        else
332            return new Base<uint32_t>(machInst, dest, op1, op2);
333    }
334
335    template <template <typename T> class Base>
336    StaticInstPtr
337    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338                               IntRegIndex dest, IntRegIndex op1,
339                               IntRegIndex op2, uint64_t imm)
340    {
341        if (size)
342            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343        else
344            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345    }
346
347    template <template <typename T> class BaseD,
348              template <typename T> class BaseQ>
349    StaticInstPtr
350    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351                                IntRegIndex dest, IntRegIndex op1,
352                                IntRegIndex op2, uint64_t imm)
353    {
354        if (q) {
355            switch (size) {
356              case 1:
357                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358              case 2:
359                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360              default:
361                return new Unknown(machInst);
362            }
363        } else {
364            switch (size) {
365              case 1:
366                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367              case 2:
368                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369              default:
370                return new Unknown(machInst);
371            }
372        }
373    }
374
375    template <template <typename T> class BaseD,
376              template <typename T> class BaseQ>
377    StaticInstPtr
378    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379                                IntRegIndex dest, IntRegIndex op1,
380                                IntRegIndex op2, uint64_t imm)
381    {
382        if (q) {
383            switch (size) {
384              case 1:
385                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386              case 2:
387                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388              default:
389                return new Unknown(machInst);
390            }
391        } else {
392            switch (size) {
393              case 1:
394                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395              case 2:
396                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397              default:
398                return new Unknown(machInst);
399            }
400        }
401    }
402
403    template <template <typename T> class BaseD,
404              template <typename T> class BaseQ>
405    StaticInstPtr
406    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407                             IntRegIndex dest, IntRegIndex op1,
408                             IntRegIndex op2, uint64_t imm)
409    {
410        if (q) {
411            if (size)
412                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413            else
414                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415        } else {
416            if (size)
417                return new Unknown(machInst);
418            else
419                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420        }
421    }
422
423    template <template <typename T> class BaseD,
424              template <typename T> class BaseQ>
425    StaticInstPtr
426    decodeNeonUTwoShiftReg(bool q, unsigned size,
427                           ExtMachInst machInst, IntRegIndex dest,
428                           IntRegIndex op1, uint64_t imm)
429    {
430        if (q) {
431            switch (size) {
432              case 0:
433                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434              case 1:
435                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436              case 2:
437                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438              case 3:
439                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440              default:
441                return new Unknown(machInst);
442            }
443        } else {
444            switch (size) {
445              case 0:
446                return new BaseD<uint8_t>(machInst, dest, op1, imm);
447              case 1:
448                return new BaseD<uint16_t>(machInst, dest, op1, imm);
449              case 2:
450                return new BaseD<uint32_t>(machInst, dest, op1, imm);
451              case 3:
452                return new BaseD<uint64_t>(machInst, dest, op1, imm);
453              default:
454                return new Unknown(machInst);
455            }
456        }
457    }
458
459    template <template <typename T> class BaseD,
460              template <typename T> class BaseQ>
461    StaticInstPtr
462    decodeNeonSTwoShiftReg(bool q, unsigned size,
463                           ExtMachInst machInst, IntRegIndex dest,
464                           IntRegIndex op1, uint64_t imm)
465    {
466        if (q) {
467            switch (size) {
468              case 0:
469                return new BaseQ<int8_t>(machInst, dest, op1, imm);
470              case 1:
471                return new BaseQ<int16_t>(machInst, dest, op1, imm);
472              case 2:
473                return new BaseQ<int32_t>(machInst, dest, op1, imm);
474              case 3:
475                return new BaseQ<int64_t>(machInst, dest, op1, imm);
476              default:
477                return new Unknown(machInst);
478            }
479        } else {
480            switch (size) {
481              case 0:
482                return new BaseD<int8_t>(machInst, dest, op1, imm);
483              case 1:
484                return new BaseD<int16_t>(machInst, dest, op1, imm);
485              case 2:
486                return new BaseD<int32_t>(machInst, dest, op1, imm);
487              case 3:
488                return new BaseD<int64_t>(machInst, dest, op1, imm);
489              default:
490                return new Unknown(machInst);
491            }
492        }
493    }
494
495
496    template <template <typename T> class BaseD,
497              template <typename T> class BaseQ>
498    StaticInstPtr
499    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500                            ExtMachInst machInst, IntRegIndex dest,
501                            IntRegIndex op1, uint64_t imm)
502    {
503        if (notSigned) {
504            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505                    q, size, machInst, dest, op1, imm);
506        } else {
507            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508                    q, size, machInst, dest, op1, imm);
509        }
510    }
511
512    template <template <typename T> class Base>
513    StaticInstPtr
514    decodeNeonUTwoShiftUSReg(unsigned size,
515                             ExtMachInst machInst, IntRegIndex dest,
516                             IntRegIndex op1, uint64_t imm)
517    {
518        switch (size) {
519          case 0:
520            return new Base<uint8_t>(machInst, dest, op1, imm);
521          case 1:
522            return new Base<uint16_t>(machInst, dest, op1, imm);
523          case 2:
524            return new Base<uint32_t>(machInst, dest, op1, imm);
525          default:
526            return new Unknown(machInst);
527        }
528    }
529
530    template <template <typename T> class Base>
531    StaticInstPtr
532    decodeNeonUTwoShiftUReg(unsigned size,
533                            ExtMachInst machInst, IntRegIndex dest,
534                            IntRegIndex op1, uint64_t imm)
535    {
536        switch (size) {
537          case 0:
538            return new Base<uint8_t>(machInst, dest, op1, imm);
539          case 1:
540            return new Base<uint16_t>(machInst, dest, op1, imm);
541          case 2:
542            return new Base<uint32_t>(machInst, dest, op1, imm);
543          case 3:
544            return new Base<uint64_t>(machInst, dest, op1, imm);
545          default:
546            return new Unknown(machInst);
547        }
548    }
549
550    template <template <typename T> class Base>
551    StaticInstPtr
552    decodeNeonSTwoShiftUReg(unsigned size,
553                            ExtMachInst machInst, IntRegIndex dest,
554                            IntRegIndex op1, uint64_t imm)
555    {
556        switch (size) {
557          case 0:
558            return new Base<int8_t>(machInst, dest, op1, imm);
559          case 1:
560            return new Base<int16_t>(machInst, dest, op1, imm);
561          case 2:
562            return new Base<int32_t>(machInst, dest, op1, imm);
563          case 3:
564            return new Base<int64_t>(machInst, dest, op1, imm);
565          default:
566            return new Unknown(machInst);
567        }
568    }
569
570    template <template <typename T> class BaseD,
571              template <typename T> class BaseQ>
572    StaticInstPtr
573    decodeNeonUTwoShiftSReg(bool q, unsigned size,
574                            ExtMachInst machInst, IntRegIndex dest,
575                            IntRegIndex op1, uint64_t imm)
576    {
577        if (q) {
578            return decodeNeonUTwoShiftUSReg<BaseQ>(
579                    size, machInst, dest, op1, imm);
580        } else {
581            return decodeNeonUTwoShiftUSReg<BaseD>(
582                    size, machInst, dest, op1, imm);
583        }
584    }
585
586    template <template <typename T> class Base>
587    StaticInstPtr
588    decodeNeonSTwoShiftUSReg(unsigned size,
589                             ExtMachInst machInst, IntRegIndex dest,
590                             IntRegIndex op1, uint64_t imm)
591    {
592        switch (size) {
593          case 0:
594            return new Base<int8_t>(machInst, dest, op1, imm);
595          case 1:
596            return new Base<int16_t>(machInst, dest, op1, imm);
597          case 2:
598            return new Base<int32_t>(machInst, dest, op1, imm);
599          default:
600            return new Unknown(machInst);
601        }
602    }
603
604    template <template <typename T> class BaseD,
605              template <typename T> class BaseQ>
606    StaticInstPtr
607    decodeNeonSTwoShiftSReg(bool q, unsigned size,
608                            ExtMachInst machInst, IntRegIndex dest,
609                            IntRegIndex op1, uint64_t imm)
610    {
611        if (q) {
612            return decodeNeonSTwoShiftUSReg<BaseQ>(
613                    size, machInst, dest, op1, imm);
614        } else {
615            return decodeNeonSTwoShiftUSReg<BaseD>(
616                    size, machInst, dest, op1, imm);
617        }
618    }
619
620    template <template <typename T> class BaseD,
621              template <typename T> class BaseQ>
622    StaticInstPtr
623    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624                             ExtMachInst machInst, IntRegIndex dest,
625                             IntRegIndex op1, uint64_t imm)
626    {
627        if (notSigned) {
628            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629                    q, size, machInst, dest, op1, imm);
630        } else {
631            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632                    q, size, machInst, dest, op1, imm);
633        }
634    }
635
636    template <template <typename T> class BaseD,
637              template <typename T> class BaseQ>
638    StaticInstPtr
639    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641    {
642        if (q) {
643            return decodeNeonUTwoShiftUReg<BaseQ>(
644                size, machInst, dest, op1, imm);
645        } else {
646            return decodeNeonUTwoShiftUSReg<BaseD>(
647                size, machInst, dest, op1, imm);
648        }
649    }
650
651    template <template <typename T> class BaseD,
652              template <typename T> class BaseQ>
653    StaticInstPtr
654    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656    {
657        if (q) {
658            return decodeNeonSTwoShiftUReg<BaseQ>(
659                size, machInst, dest, op1, imm);
660        } else {
661            return decodeNeonSTwoShiftUSReg<BaseD>(
662                size, machInst, dest, op1, imm);
663        }
664    }
665
666    template <template <typename T> class Base>
667    StaticInstPtr
668    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670    {
671        if (size)
672            return new Base<uint64_t>(machInst, dest, op1, imm);
673        else
674            return new Base<uint32_t>(machInst, dest, op1, imm);
675    }
676
677    template <template <typename T> class BaseD,
678              template <typename T> class BaseQ>
679    StaticInstPtr
680    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682    {
683        if (q) {
684            if (size)
685                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686            else
687                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688        } else {
689            if (size)
690                return new Unknown(machInst);
691            else
692                return new BaseD<uint32_t>(machInst, dest, op1, imm);
693        }
694    }
695
696    template <template <typename T> class Base>
697    StaticInstPtr
698    decodeNeonUTwoMiscUSReg(unsigned size,
699                            ExtMachInst machInst, IntRegIndex dest,
700                            IntRegIndex op1)
701    {
702        switch (size) {
703          case 0:
704            return new Base<uint8_t>(machInst, dest, op1);
705          case 1:
706            return new Base<uint16_t>(machInst, dest, op1);
707          case 2:
708            return new Base<uint32_t>(machInst, dest, op1);
709          default:
710            return new Unknown(machInst);
711        }
712    }
713
714    template <template <typename T> class Base>
715    StaticInstPtr
716    decodeNeonSTwoMiscUSReg(unsigned size,
717                            ExtMachInst machInst, IntRegIndex dest,
718                            IntRegIndex op1)
719    {
720        switch (size) {
721          case 0:
722            return new Base<int8_t>(machInst, dest, op1);
723          case 1:
724            return new Base<int16_t>(machInst, dest, op1);
725          case 2:
726            return new Base<int32_t>(machInst, dest, op1);
727          default:
728            return new Unknown(machInst);
729        }
730    }
731
732    template <template <typename T> class BaseD,
733              template <typename T> class BaseQ>
734    StaticInstPtr
735    decodeNeonUTwoMiscSReg(bool q, unsigned size,
736                           ExtMachInst machInst, IntRegIndex dest,
737                           IntRegIndex op1)
738    {
739        if (q) {
740            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741        } else {
742            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743        }
744    }
745
746    template <template <typename T> class BaseD,
747              template <typename T> class BaseQ>
748    StaticInstPtr
749    decodeNeonSTwoMiscSReg(bool q, unsigned size,
750                           ExtMachInst machInst, IntRegIndex dest,
751                           IntRegIndex op1)
752    {
753        if (q) {
754            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755        } else {
756            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757        }
758    }
759
760    template <template <typename T> class Base>
761    StaticInstPtr
762    decodeNeonUTwoMiscUReg(unsigned size,
763                           ExtMachInst machInst, IntRegIndex dest,
764                           IntRegIndex op1)
765    {
766        switch (size) {
767          case 0:
768            return new Base<uint8_t>(machInst, dest, op1);
769          case 1:
770            return new Base<uint16_t>(machInst, dest, op1);
771          case 2:
772            return new Base<uint32_t>(machInst, dest, op1);
773          case 3:
774            return new Base<uint64_t>(machInst, dest, op1);
775          default:
776            return new Unknown(machInst);
777        }
778    }
779
780    template <template <typename T> class Base>
781    StaticInstPtr
782    decodeNeonSTwoMiscUReg(unsigned size,
783                           ExtMachInst machInst, IntRegIndex dest,
784                           IntRegIndex op1)
785    {
786        switch (size) {
787          case 0:
788            return new Base<int8_t>(machInst, dest, op1);
789          case 1:
790            return new Base<int16_t>(machInst, dest, op1);
791          case 2:
792            return new Base<int32_t>(machInst, dest, op1);
793          case 3:
794            return new Base<int64_t>(machInst, dest, op1);
795          default:
796            return new Unknown(machInst);
797        }
798    }
799
800    template <template <typename T> class BaseD,
801              template <typename T> class BaseQ>
802    StaticInstPtr
803    decodeNeonSTwoMiscReg(bool q, unsigned size,
804                          ExtMachInst machInst, IntRegIndex dest,
805                          IntRegIndex op1)
806    {
807        if (q) {
808            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809        } else {
810            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811        }
812    }
813
814    template <template <typename T> class BaseD,
815              template <typename T> class BaseQ>
816    StaticInstPtr
817    decodeNeonUTwoMiscReg(bool q, unsigned size,
818                          ExtMachInst machInst, IntRegIndex dest,
819                          IntRegIndex op1)
820    {
821        if (q) {
822            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823        } else {
824            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825        }
826    }
827
828    template <template <typename T> class BaseD,
829              template <typename T> class BaseQ>
830    StaticInstPtr
831    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832                            ExtMachInst machInst, IntRegIndex dest,
833                            IntRegIndex op1)
834    {
835        if (notSigned) {
836            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837                    q, size, machInst, dest, op1);
838        } else {
839            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840                    q, size, machInst, dest, op1);
841        }
842    }
843
844    template <template <typename T> class BaseD,
845              template <typename T> class BaseQ>
846    StaticInstPtr
847    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848                           IntRegIndex dest, IntRegIndex op1)
849    {
850        if (q) {
851            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852        } else {
853            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854        }
855    }
856
857    template <template <typename T> class BaseD,
858              template <typename T> class BaseQ>
859    StaticInstPtr
860    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861                           IntRegIndex dest, IntRegIndex op1)
862    {
863        if (q) {
864            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865        } else {
866            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867        }
868    }
869
870    template <template <typename T> class BaseD,
871              template <typename T> class BaseQ>
872    StaticInstPtr
873    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874                            IntRegIndex dest, IntRegIndex op1)
875    {
876        if (q) {
877            if (size)
878                return new BaseQ<uint64_t>(machInst, dest, op1);
879            else
880                return new BaseQ<uint32_t>(machInst, dest, op1);
881        } else {
882            if (size)
883                return new Unknown(machInst);
884            else
885                return new BaseD<uint32_t>(machInst, dest, op1);
886        }
887    }
888
889    template <template <typename T> class BaseD,
890              template <typename T> class BaseQ>
891    StaticInstPtr
892    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893                                   IntRegIndex dest, IntRegIndex op1)
894    {
895        if (size)
896            return new BaseQ<uint64_t>(machInst, dest, op1);
897        else
898            return new BaseD<uint32_t>(machInst, dest, op1);
899    }
900
901    template <template <typename T> class Base>
902    StaticInstPtr
903    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904                              IntRegIndex dest, IntRegIndex op1)
905    {
906        if (size)
907            return new Base<uint64_t>(machInst, dest, op1);
908        else
909            return new Base<uint32_t>(machInst, dest, op1);
910    }
911
912    template <template <typename T> class BaseD,
913              template <typename T> class BaseQ>
914    StaticInstPtr
915    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916                              IntRegIndex dest, IntRegIndex op1)
917    {
918        if (q) {
919            switch (size) {
920              case 0x0:
921                return new BaseQ<uint8_t>(machInst, dest, op1);
922              case 0x1:
923                return new BaseQ<uint16_t>(machInst, dest, op1);
924              case 0x2:
925                return new BaseQ<uint32_t>(machInst, dest, op1);
926              default:
927                return new Unknown(machInst);
928            }
929        } else {
930            switch (size) {
931              case 0x0:
932                return new BaseD<uint8_t>(machInst, dest, op1);
933              case 0x1:
934                return new BaseD<uint16_t>(machInst, dest, op1);
935              default:
936                return new Unknown(machInst);
937            }
938        }
939    }
940
941    template <template <typename T> class BaseD,
942              template <typename T> class BaseQ,
943              template <typename T> class BaseBQ>
944    StaticInstPtr
945    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946                              IntRegIndex dest, IntRegIndex op1)
947    {
948        if (q) {
949            switch (size) {
950              case 0x0:
951                return new BaseQ<uint8_t>(machInst, dest, op1);
952              case 0x1:
953                return new BaseQ<uint16_t>(machInst, dest, op1);
954              case 0x2:
955                return new BaseBQ<uint32_t>(machInst, dest, op1);
956              default:
957                return new Unknown(machInst);
958            }
959        } else {
960            switch (size) {
961              case 0x0:
962                return new BaseD<uint8_t>(machInst, dest, op1);
963              case 0x1:
964                return new BaseD<uint16_t>(machInst, dest, op1);
965              default:
966                return new Unknown(machInst);
967            }
968        }
969    }
970
971    template <template <typename T> class BaseD,
972              template <typename T> class BaseQ>
973    StaticInstPtr
974    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975                              IntRegIndex dest, IntRegIndex op1)
976    {
977        if (q) {
978            switch (size) {
979              case 0x0:
980                return new BaseQ<int8_t>(machInst, dest, op1);
981              case 0x1:
982                return new BaseQ<int16_t>(machInst, dest, op1);
983              case 0x2:
984                return new BaseQ<int32_t>(machInst, dest, op1);
985              default:
986                return new Unknown(machInst);
987            }
988        } else {
989            switch (size) {
990              case 0x0:
991                return new BaseD<int8_t>(machInst, dest, op1);
992              case 0x1:
993                return new BaseD<int16_t>(machInst, dest, op1);
994              default:
995                return new Unknown(machInst);
996            }
997        }
998    }
999
1000    template <template <typename T> class BaseD,
1001              template <typename T> class BaseQ,
1002              template <typename T> class BaseBQ>
1003    StaticInstPtr
1004    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005                                  IntRegIndex dest, IntRegIndex op1)
1006    {
1007        if (q) {
1008            switch (size) {
1009              case 0x0:
1010                return new BaseQ<uint8_t>(machInst, dest, op1);
1011              case 0x1:
1012                return new BaseQ<uint16_t>(machInst, dest, op1);
1013              case 0x2:
1014                return new BaseBQ<uint32_t>(machInst, dest, op1);
1015              default:
1016                return new Unknown(machInst);
1017            }
1018        } else {
1019            switch (size) {
1020              case 0x0:
1021                return new BaseD<uint8_t>(machInst, dest, op1);
1022              case 0x1:
1023                return new BaseD<uint16_t>(machInst, dest, op1);
1024              default:
1025                return new Unknown(machInst);
1026            }
1027        }
1028    }
1029
1030    template <template <typename T> class BaseD,
1031              template <typename T> class BaseQ,
1032              template <typename T> class BaseBQ>
1033    StaticInstPtr
1034    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035                                  IntRegIndex dest, IntRegIndex op1)
1036    {
1037        if (q) {
1038            switch (size) {
1039              case 0x0:
1040                return new BaseQ<int8_t>(machInst, dest, op1);
1041              case 0x1:
1042                return new BaseQ<int16_t>(machInst, dest, op1);
1043              case 0x2:
1044                return new BaseBQ<int32_t>(machInst, dest, op1);
1045              default:
1046                return new Unknown(machInst);
1047            }
1048        } else {
1049            switch (size) {
1050              case 0x0:
1051                return new BaseD<int8_t>(machInst, dest, op1);
1052              case 0x1:
1053                return new BaseD<int16_t>(machInst, dest, op1);
1054              default:
1055                return new Unknown(machInst);
1056            }
1057        }
1058    }
1059}};
1060
1061let {{
1062    header_output = ""
1063    exec_output = ""
1064
1065    vcompares = '''
1066    static float
1067    vcgtFunc(float op1, float op2)
1068    {
1069        if (std::isnan(op1) || std::isnan(op2))
1070            return 2.0;
1071        return (op1 > op2) ? 0.0 : 1.0;
1072    }
1073
1074    static float
1075    vcgeFunc(float op1, float op2)
1076    {
1077        if (std::isnan(op1) || std::isnan(op2))
1078            return 2.0;
1079        return (op1 >= op2) ? 0.0 : 1.0;
1080    }
1081
1082    static float
1083    vceqFunc(float op1, float op2)
1084    {
1085        if (isSnan(op1) || isSnan(op2))
1086            return 2.0;
1087        return (op1 == op2) ? 0.0 : 1.0;
1088    }
1089'''
1090    vcomparesL = '''
1091    static float
1092    vcleFunc(float op1, float op2)
1093    {
1094        if (std::isnan(op1) || std::isnan(op2))
1095            return 2.0;
1096        return (op1 <= op2) ? 0.0 : 1.0;
1097    }
1098
1099    static float
1100    vcltFunc(float op1, float op2)
1101    {
1102        if (std::isnan(op1) || std::isnan(op2))
1103            return 2.0;
1104        return (op1 < op2) ? 0.0 : 1.0;
1105    }
1106'''
1107    vacomparesG = '''
1108    static float
1109    vacgtFunc(float op1, float op2)
1110    {
1111        if (std::isnan(op1) || std::isnan(op2))
1112            return 2.0;
1113        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114    }
1115
1116    static float
1117    vacgeFunc(float op1, float op2)
1118    {
1119        if (std::isnan(op1) || std::isnan(op2))
1120            return 2.0;
1121        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122    }
1123'''
1124
1125    exec_output += vcompares + vacomparesG
1126
1127    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130    signedTypes = smallSignedTypes + ("int64_t",)
1131    smallTypes = smallUnsignedTypes + smallSignedTypes
1132    allTypes = unsignedTypes + signedTypes
1133
1134    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135                          readDest=False, pairwise=False,
1136                          standardFpcsr=False):
1137        global header_output, exec_output
1138        eWalkCode = simdEnabledCheckCode + '''
1139        RegVect srcReg1, srcReg2, destReg;
1140        '''
1141        for reg in range(rCount):
1142            eWalkCode += '''
1143                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1144                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1145            ''' % { "reg" : reg }
1146            if readDest:
1147                eWalkCode += '''
1148                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1149                ''' % { "reg" : reg }
1150        readDestCode = ''
1151        if standardFpcsr:
1152            eWalkCode += '''
1153            FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc);
1154            '''
1155        if readDest:
1156            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1157        if pairwise:
1158            eWalkCode += '''
1159            for (unsigned i = 0; i < eCount; i++) {
1160                Element srcElem1 = gtoh(2 * i < eCount ?
1161                                        srcReg1.elements[2 * i] :
1162                                        srcReg2.elements[2 * i - eCount]);
1163                Element srcElem2 = gtoh(2 * i < eCount ?
1164                                        srcReg1.elements[2 * i + 1] :
1165                                        srcReg2.elements[2 * i + 1 - eCount]);
1166                Element destElem;
1167                %(readDest)s
1168                %(op)s
1169                destReg.elements[i] = htog(destElem);
1170            }
1171            ''' % { "op" : op, "readDest" : readDestCode }
1172        else:
1173            eWalkCode += '''
1174            for (unsigned i = 0; i < eCount; i++) {
1175                Element srcElem1 = gtoh(srcReg1.elements[i]);
1176                Element srcElem2 = gtoh(srcReg2.elements[i]);
1177                Element destElem;
1178                %(readDest)s
1179                %(op)s
1180                destReg.elements[i] = htog(destElem);
1181            }
1182            ''' % { "op" : op, "readDest" : readDestCode }
1183        if standardFpcsr:
1184            eWalkCode += '''
1185            FpscrExc = fpscr;
1186            '''
1187        for reg in range(rCount):
1188            eWalkCode += '''
1189            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1190            ''' % { "reg" : reg }
1191        iop = InstObjParams(name, Name,
1192                            "RegRegRegOp",
1193                            { "code": eWalkCode,
1194                              "r_count": rCount,
1195                              "predicate_test": predicateTest,
1196                              "op_class": opClass }, [])
1197        header_output += NeonRegRegRegOpDeclare.subst(iop)
1198        exec_output += NeonEqualRegExecute.subst(iop)
1199        for type in types:
1200            substDict = { "targs" : type,
1201                          "class_name" : Name }
1202            exec_output += NeonExecDeclare.subst(substDict)
1203
1204    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1205                            readDest=False, pairwise=False, toInt=False):
1206        global header_output, exec_output
1207        eWalkCode = simdEnabledCheckCode + '''
1208        typedef float FloatVect[rCount];
1209        FloatVect srcRegs1, srcRegs2;
1210        '''
1211        if toInt:
1212            eWalkCode += 'RegVect destRegs;\n'
1213        else:
1214            eWalkCode += 'FloatVect destRegs;\n'
1215        for reg in range(rCount):
1216            eWalkCode += '''
1217                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1218                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1219            ''' % { "reg" : reg }
1220            if readDest:
1221                if toInt:
1222                    eWalkCode += '''
1223                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1224                    ''' % { "reg" : reg }
1225                else:
1226                    eWalkCode += '''
1227                        destRegs[%(reg)d] = FpDestP%(reg)d;
1228                    ''' % { "reg" : reg }
1229        readDestCode = ''
1230        if readDest:
1231            readDestCode = 'destReg = destRegs[r];'
1232        destType = 'float'
1233        writeDest = 'destRegs[r] = destReg;'
1234        if toInt:
1235            destType = 'uint32_t'
1236            writeDest = 'destRegs.regs[r] = destReg;'
1237        if pairwise:
1238            eWalkCode += '''
1239            for (unsigned r = 0; r < rCount; r++) {
1240                float srcReg1 = (2 * r < rCount) ?
1241                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1242                float srcReg2 = (2 * r < rCount) ?
1243                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1244                %(destType)s destReg;
1245                %(readDest)s
1246                %(op)s
1247                %(writeDest)s
1248            }
1249            ''' % { "op" : op,
1250                    "readDest" : readDestCode,
1251                    "destType" : destType,
1252                    "writeDest" : writeDest }
1253        else:
1254            eWalkCode += '''
1255            for (unsigned r = 0; r < rCount; r++) {
1256                float srcReg1 = srcRegs1[r];
1257                float srcReg2 = srcRegs2[r];
1258                %(destType)s destReg;
1259                %(readDest)s
1260                %(op)s
1261                %(writeDest)s
1262            }
1263            ''' % { "op" : op,
1264                    "readDest" : readDestCode,
1265                    "destType" : destType,
1266                    "writeDest" : writeDest }
1267        for reg in range(rCount):
1268            if toInt:
1269                eWalkCode += '''
1270                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1271                ''' % { "reg" : reg }
1272            else:
1273                eWalkCode += '''
1274                FpDestP%(reg)d = destRegs[%(reg)d];
1275                ''' % { "reg" : reg }
1276        iop = InstObjParams(name, Name,
1277                            "FpRegRegRegOp",
1278                            { "code": eWalkCode,
1279                              "r_count": rCount,
1280                              "predicate_test": predicateTest,
1281                              "op_class": opClass }, [])
1282        header_output += NeonRegRegRegOpDeclare.subst(iop)
1283        exec_output += NeonEqualRegExecute.subst(iop)
1284        for type in types:
1285            substDict = { "targs" : type,
1286                          "class_name" : Name }
1287            exec_output += NeonExecDeclare.subst(substDict)
1288
1289    def threeUnequalRegInst(name, Name, opClass, types, op,
1290                            bigSrc1, bigSrc2, bigDest, readDest):
1291        global header_output, exec_output
1292        src1Cnt = src2Cnt = destCnt = 2
1293        src1Prefix = src2Prefix = destPrefix = ''
1294        if bigSrc1:
1295            src1Cnt = 4
1296            src1Prefix = 'Big'
1297        if bigSrc2:
1298            src2Cnt = 4
1299            src2Prefix = 'Big'
1300        if bigDest:
1301            destCnt = 4
1302            destPrefix = 'Big'
1303        eWalkCode = simdEnabledCheckCode + '''
1304            %sRegVect srcReg1;
1305            %sRegVect srcReg2;
1306            %sRegVect destReg;
1307        ''' % (src1Prefix, src2Prefix, destPrefix)
1308        for reg in range(src1Cnt):
1309            eWalkCode += '''
1310                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1311            ''' % { "reg" : reg }
1312        for reg in range(src2Cnt):
1313            eWalkCode += '''
1314                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1315            ''' % { "reg" : reg }
1316        if readDest:
1317            for reg in range(destCnt):
1318                eWalkCode += '''
1319                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1320                ''' % { "reg" : reg }
1321        readDestCode = ''
1322        if readDest:
1323            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1324        eWalkCode += '''
1325        for (unsigned i = 0; i < eCount; i++) {
1326            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1327            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1328            %(destPrefix)sElement destElem;
1329            %(readDest)s
1330            %(op)s
1331            destReg.elements[i] = htog(destElem);
1332        }
1333        ''' % { "op" : op, "readDest" : readDestCode,
1334                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1335                "destPrefix" : destPrefix }
1336        for reg in range(destCnt):
1337            eWalkCode += '''
1338            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1339            ''' % { "reg" : reg }
1340        iop = InstObjParams(name, Name,
1341                            "RegRegRegOp",
1342                            { "code": eWalkCode,
1343                              "r_count": 2,
1344                              "predicate_test": predicateTest,
1345                              "op_class": opClass }, [])
1346        header_output += NeonRegRegRegOpDeclare.subst(iop)
1347        exec_output += NeonUnequalRegExecute.subst(iop)
1348        for type in types:
1349            substDict = { "targs" : type,
1350                          "class_name" : Name }
1351            exec_output += NeonExecDeclare.subst(substDict)
1352
1353    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1354        threeUnequalRegInst(name, Name, opClass, types, op,
1355                            True, True, False, readDest)
1356
1357    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1358        threeUnequalRegInst(name, Name, opClass, types, op,
1359                            False, False, True, readDest)
1360
1361    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1362        threeUnequalRegInst(name, Name, opClass, types, op,
1363                            True, False, True, readDest)
1364
1365    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1366        global header_output, exec_output
1367        eWalkCode = simdEnabledCheckCode + '''
1368        RegVect srcReg1, srcReg2, destReg;
1369        '''
1370        for reg in range(rCount):
1371            eWalkCode += '''
1372                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1373                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1374            ''' % { "reg" : reg }
1375            if readDest:
1376                eWalkCode += '''
1377                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1378                ''' % { "reg" : reg }
1379        readDestCode = ''
1380        if readDest:
1381            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1382        eWalkCode += '''
1383        if (imm < 0 && imm >= eCount) {
1384            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1385                                                           mnemonic);
1386        } else {
1387            for (unsigned i = 0; i < eCount; i++) {
1388                Element srcElem1 = gtoh(srcReg1.elements[i]);
1389                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1390                Element destElem;
1391                %(readDest)s
1392                %(op)s
1393                destReg.elements[i] = htog(destElem);
1394            }
1395        }
1396        ''' % { "op" : op, "readDest" : readDestCode }
1397        for reg in range(rCount):
1398            eWalkCode += '''
1399            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1400            ''' % { "reg" : reg }
1401        iop = InstObjParams(name, Name,
1402                            "RegRegRegImmOp",
1403                            { "code": eWalkCode,
1404                              "r_count": rCount,
1405                              "predicate_test": predicateTest,
1406                              "op_class": opClass }, [])
1407        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1408        exec_output += NeonEqualRegExecute.subst(iop)
1409        for type in types:
1410            substDict = { "targs" : type,
1411                          "class_name" : Name }
1412            exec_output += NeonExecDeclare.subst(substDict)
1413
1414    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1415        global header_output, exec_output
1416        rCount = 2
1417        eWalkCode = simdEnabledCheckCode + '''
1418        RegVect srcReg1, srcReg2;
1419        BigRegVect destReg;
1420        '''
1421        for reg in range(rCount):
1422            eWalkCode += '''
1423                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1424                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1425            ''' % { "reg" : reg }
1426        if readDest:
1427            for reg in range(2 * rCount):
1428                eWalkCode += '''
1429                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1430                ''' % { "reg" : reg }
1431        readDestCode = ''
1432        if readDest:
1433            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1434        eWalkCode += '''
1435        if (imm < 0 && imm >= eCount) {
1436            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1437                                                          mnemonic);
1438        } else {
1439            for (unsigned i = 0; i < eCount; i++) {
1440                Element srcElem1 = gtoh(srcReg1.elements[i]);
1441                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1442                BigElement destElem;
1443                %(readDest)s
1444                %(op)s
1445                destReg.elements[i] = htog(destElem);
1446            }
1447        }
1448        ''' % { "op" : op, "readDest" : readDestCode }
1449        for reg in range(2 * rCount):
1450            eWalkCode += '''
1451            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1452            ''' % { "reg" : reg }
1453        iop = InstObjParams(name, Name,
1454                            "RegRegRegImmOp",
1455                            { "code": eWalkCode,
1456                              "r_count": rCount,
1457                              "predicate_test": predicateTest,
1458                              "op_class": opClass }, [])
1459        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1460        exec_output += NeonUnequalRegExecute.subst(iop)
1461        for type in types:
1462            substDict = { "targs" : type,
1463                          "class_name" : Name }
1464            exec_output += NeonExecDeclare.subst(substDict)
1465
1466    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1467        global header_output, exec_output
1468        eWalkCode = simdEnabledCheckCode + '''
1469        typedef float FloatVect[rCount];
1470        FloatVect srcRegs1, srcRegs2, destRegs;
1471        '''
1472        for reg in range(rCount):
1473            eWalkCode += '''
1474                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1475                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1476            ''' % { "reg" : reg }
1477            if readDest:
1478                eWalkCode += '''
1479                    destRegs[%(reg)d] = FpDestP%(reg)d;
1480                ''' % { "reg" : reg }
1481        readDestCode = ''
1482        if readDest:
1483            readDestCode = 'destReg = destRegs[i];'
1484        eWalkCode += '''
1485        if (imm < 0 && imm >= eCount) {
1486            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1487                                                           mnemonic);
1488        } else {
1489            for (unsigned i = 0; i < rCount; i++) {
1490                float srcReg1 = srcRegs1[i];
1491                float srcReg2 = srcRegs2[imm];
1492                float destReg;
1493                %(readDest)s
1494                %(op)s
1495                destRegs[i] = destReg;
1496            }
1497        }
1498        ''' % { "op" : op, "readDest" : readDestCode }
1499        for reg in range(rCount):
1500            eWalkCode += '''
1501            FpDestP%(reg)d = destRegs[%(reg)d];
1502            ''' % { "reg" : reg }
1503        iop = InstObjParams(name, Name,
1504                            "FpRegRegRegImmOp",
1505                            { "code": eWalkCode,
1506                              "r_count": rCount,
1507                              "predicate_test": predicateTest,
1508                              "op_class": opClass }, [])
1509        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1510        exec_output += NeonEqualRegExecute.subst(iop)
1511        for type in types:
1512            substDict = { "targs" : type,
1513                          "class_name" : Name }
1514            exec_output += NeonExecDeclare.subst(substDict)
1515
1516    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1517            readDest=False, toInt=False, fromInt=False):
1518        global header_output, exec_output
1519        eWalkCode = simdEnabledCheckCode + '''
1520        RegVect srcRegs1, destRegs;
1521        '''
1522        for reg in range(rCount):
1523            eWalkCode += '''
1524                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1525            ''' % { "reg" : reg }
1526            if readDest:
1527                eWalkCode += '''
1528                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1529                ''' % { "reg" : reg }
1530        readDestCode = ''
1531        if readDest:
1532            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1533            if toInt:
1534                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1535        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1536        if fromInt:
1537            readOpCode = 'uint32_t srcReg1 = gtoh(srcRegs1.regs[i]);'
1538        declDest = 'Element destElem;'
1539        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1540        if toInt:
1541            declDest = 'uint32_t destReg;'
1542            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1543        eWalkCode += '''
1544        for (unsigned i = 0; i < eCount; i++) {
1545            %(readOp)s
1546            %(declDest)s
1547            %(readDest)s
1548            %(op)s
1549            %(writeDest)s
1550        }
1551        ''' % { "readOp" : readOpCode,
1552                "declDest" : declDest,
1553                "readDest" : readDestCode,
1554                "op" : op,
1555                "writeDest" : writeDestCode }
1556        for reg in range(rCount):
1557            eWalkCode += '''
1558            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1559            ''' % { "reg" : reg }
1560        iop = InstObjParams(name, Name,
1561                            "RegRegImmOp",
1562                            { "code": eWalkCode,
1563                              "r_count": rCount,
1564                              "predicate_test": predicateTest,
1565                              "op_class": opClass }, [])
1566        header_output += NeonRegRegImmOpDeclare.subst(iop)
1567        exec_output += NeonEqualRegExecute.subst(iop)
1568        for type in types:
1569            substDict = { "targs" : type,
1570                          "class_name" : Name }
1571            exec_output += NeonExecDeclare.subst(substDict)
1572
1573    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1574        global header_output, exec_output
1575        eWalkCode = simdEnabledCheckCode + '''
1576        BigRegVect srcReg1;
1577        RegVect destReg;
1578        '''
1579        for reg in range(4):
1580            eWalkCode += '''
1581                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1582            ''' % { "reg" : reg }
1583        if readDest:
1584            for reg in range(2):
1585                eWalkCode += '''
1586                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1587                ''' % { "reg" : reg }
1588        readDestCode = ''
1589        if readDest:
1590            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1591        eWalkCode += '''
1592        for (unsigned i = 0; i < eCount; i++) {
1593            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1594            Element destElem;
1595            %(readDest)s
1596            %(op)s
1597            destReg.elements[i] = htog(destElem);
1598        }
1599        ''' % { "op" : op, "readDest" : readDestCode }
1600        for reg in range(2):
1601            eWalkCode += '''
1602            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1603            ''' % { "reg" : reg }
1604        iop = InstObjParams(name, Name,
1605                            "RegRegImmOp",
1606                            { "code": eWalkCode,
1607                              "r_count": 2,
1608                              "predicate_test": predicateTest,
1609                              "op_class": opClass }, [])
1610        header_output += NeonRegRegImmOpDeclare.subst(iop)
1611        exec_output += NeonUnequalRegExecute.subst(iop)
1612        for type in types:
1613            substDict = { "targs" : type,
1614                          "class_name" : Name }
1615            exec_output += NeonExecDeclare.subst(substDict)
1616
1617    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1618        global header_output, exec_output
1619        eWalkCode = simdEnabledCheckCode + '''
1620        RegVect srcReg1;
1621        BigRegVect destReg;
1622        '''
1623        for reg in range(2):
1624            eWalkCode += '''
1625                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1626            ''' % { "reg" : reg }
1627        if readDest:
1628            for reg in range(4):
1629                eWalkCode += '''
1630                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1631                ''' % { "reg" : reg }
1632        readDestCode = ''
1633        if readDest:
1634            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1635        eWalkCode += '''
1636        for (unsigned i = 0; i < eCount; i++) {
1637            Element srcElem1 = gtoh(srcReg1.elements[i]);
1638            BigElement destElem;
1639            %(readDest)s
1640            %(op)s
1641            destReg.elements[i] = htog(destElem);
1642        }
1643        ''' % { "op" : op, "readDest" : readDestCode }
1644        for reg in range(4):
1645            eWalkCode += '''
1646            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1647            ''' % { "reg" : reg }
1648        iop = InstObjParams(name, Name,
1649                            "RegRegImmOp",
1650                            { "code": eWalkCode,
1651                              "r_count": 2,
1652                              "predicate_test": predicateTest,
1653                              "op_class": opClass }, [])
1654        header_output += NeonRegRegImmOpDeclare.subst(iop)
1655        exec_output += NeonUnequalRegExecute.subst(iop)
1656        for type in types:
1657            substDict = { "targs" : type,
1658                          "class_name" : Name }
1659            exec_output += NeonExecDeclare.subst(substDict)
1660
1661    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1662        global header_output, exec_output
1663        eWalkCode = simdEnabledCheckCode + '''
1664        RegVect srcReg1, destReg;
1665        '''
1666        for reg in range(rCount):
1667            eWalkCode += '''
1668                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1669            ''' % { "reg" : reg }
1670            if readDest:
1671                eWalkCode += '''
1672                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1673                ''' % { "reg" : reg }
1674        readDestCode = ''
1675        if readDest:
1676            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1677        eWalkCode += '''
1678        for (unsigned i = 0; i < eCount; i++) {
1679            unsigned j = i;
1680            Element srcElem1 = gtoh(srcReg1.elements[i]);
1681            Element destElem;
1682            %(readDest)s
1683            %(op)s
1684            destReg.elements[j] = htog(destElem);
1685        }
1686        ''' % { "op" : op, "readDest" : readDestCode }
1687        for reg in range(rCount):
1688            eWalkCode += '''
1689            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1690            ''' % { "reg" : reg }
1691        iop = InstObjParams(name, Name,
1692                            "RegRegOp",
1693                            { "code": eWalkCode,
1694                              "r_count": rCount,
1695                              "predicate_test": predicateTest,
1696                              "op_class": opClass }, [])
1697        header_output += NeonRegRegOpDeclare.subst(iop)
1698        exec_output += NeonEqualRegExecute.subst(iop)
1699        for type in types:
1700            substDict = { "targs" : type,
1701                          "class_name" : Name }
1702            exec_output += NeonExecDeclare.subst(substDict)
1703
1704    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1705        global header_output, exec_output
1706        eWalkCode = simdEnabledCheckCode + '''
1707        RegVect srcReg1, destReg;
1708        '''
1709        for reg in range(rCount):
1710            eWalkCode += '''
1711                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1712            ''' % { "reg" : reg }
1713            if readDest:
1714                eWalkCode += '''
1715                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1716                ''' % { "reg" : reg }
1717        readDestCode = ''
1718        if readDest:
1719            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1720        eWalkCode += '''
1721        for (unsigned i = 0; i < eCount; i++) {
1722            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1723            Element destElem;
1724            %(readDest)s
1725            %(op)s
1726            destReg.elements[i] = htog(destElem);
1727        }
1728        ''' % { "op" : op, "readDest" : readDestCode }
1729        for reg in range(rCount):
1730            eWalkCode += '''
1731            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1732            ''' % { "reg" : reg }
1733        iop = InstObjParams(name, Name,
1734                            "RegRegImmOp",
1735                            { "code": eWalkCode,
1736                              "r_count": rCount,
1737                              "predicate_test": predicateTest,
1738                              "op_class": opClass }, [])
1739        header_output += NeonRegRegImmOpDeclare.subst(iop)
1740        exec_output += NeonEqualRegExecute.subst(iop)
1741        for type in types:
1742            substDict = { "targs" : type,
1743                          "class_name" : Name }
1744            exec_output += NeonExecDeclare.subst(substDict)
1745
1746    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1747        global header_output, exec_output
1748        eWalkCode = simdEnabledCheckCode + '''
1749        RegVect srcReg1, destReg;
1750        '''
1751        for reg in range(rCount):
1752            eWalkCode += '''
1753                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1754                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1755            ''' % { "reg" : reg }
1756            if readDest:
1757                eWalkCode += '''
1758                ''' % { "reg" : reg }
1759        readDestCode = ''
1760        if readDest:
1761            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1762        eWalkCode += op
1763        for reg in range(rCount):
1764            eWalkCode += '''
1765            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1766            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1767            ''' % { "reg" : reg }
1768        iop = InstObjParams(name, Name,
1769                            "RegRegOp",
1770                            { "code": eWalkCode,
1771                              "r_count": rCount,
1772                              "predicate_test": predicateTest,
1773                              "op_class": opClass }, [])
1774        header_output += NeonRegRegOpDeclare.subst(iop)
1775        exec_output += NeonEqualRegExecute.subst(iop)
1776        for type in types:
1777            substDict = { "targs" : type,
1778                          "class_name" : Name }
1779            exec_output += NeonExecDeclare.subst(substDict)
1780
1781    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1782            readDest=False, toInt=False):
1783        global header_output, exec_output
1784        eWalkCode = simdEnabledCheckCode + '''
1785        typedef float FloatVect[rCount];
1786        FloatVect srcRegs1;
1787        '''
1788        if toInt:
1789            eWalkCode += 'RegVect destRegs;\n'
1790        else:
1791            eWalkCode += 'FloatVect destRegs;\n'
1792        for reg in range(rCount):
1793            eWalkCode += '''
1794                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1795            ''' % { "reg" : reg }
1796            if readDest:
1797                if toInt:
1798                    eWalkCode += '''
1799                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1800                    ''' % { "reg" : reg }
1801                else:
1802                    eWalkCode += '''
1803                        destRegs[%(reg)d] = FpDestP%(reg)d;
1804                    ''' % { "reg" : reg }
1805        readDestCode = ''
1806        if readDest:
1807            readDestCode = 'destReg = destRegs[i];'
1808        destType = 'float'
1809        writeDest = 'destRegs[r] = destReg;'
1810        if toInt:
1811            destType = 'uint32_t'
1812            writeDest = 'destRegs.regs[r] = destReg;'
1813        eWalkCode += '''
1814        for (unsigned r = 0; r < rCount; r++) {
1815            float srcReg1 = srcRegs1[r];
1816            %(destType)s destReg;
1817            %(readDest)s
1818            %(op)s
1819            %(writeDest)s
1820        }
1821        ''' % { "op" : op,
1822                "readDest" : readDestCode,
1823                "destType" : destType,
1824                "writeDest" : writeDest }
1825        for reg in range(rCount):
1826            if toInt:
1827                eWalkCode += '''
1828                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1829                ''' % { "reg" : reg }
1830            else:
1831                eWalkCode += '''
1832                FpDestP%(reg)d = destRegs[%(reg)d];
1833                ''' % { "reg" : reg }
1834        iop = InstObjParams(name, Name,
1835                            "FpRegRegOp",
1836                            { "code": eWalkCode,
1837                              "r_count": rCount,
1838                              "predicate_test": predicateTest,
1839                              "op_class": opClass }, [])
1840        header_output += NeonRegRegOpDeclare.subst(iop)
1841        exec_output += NeonEqualRegExecute.subst(iop)
1842        for type in types:
1843            substDict = { "targs" : type,
1844                          "class_name" : Name }
1845            exec_output += NeonExecDeclare.subst(substDict)
1846
1847    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1848        global header_output, exec_output
1849        eWalkCode = simdEnabledCheckCode + '''
1850        RegVect srcRegs;
1851        BigRegVect destReg;
1852        '''
1853        for reg in range(rCount):
1854            eWalkCode += '''
1855                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1856            ''' % { "reg" : reg }
1857            if readDest:
1858                eWalkCode += '''
1859                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1860                ''' % { "reg" : reg }
1861        readDestCode = ''
1862        if readDest:
1863            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1864        eWalkCode += '''
1865        for (unsigned i = 0; i < eCount / 2; i++) {
1866            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1867            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1868            BigElement destElem;
1869            %(readDest)s
1870            %(op)s
1871            destReg.elements[i] = htog(destElem);
1872        }
1873        ''' % { "op" : op, "readDest" : readDestCode }
1874        for reg in range(rCount):
1875            eWalkCode += '''
1876            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1877            ''' % { "reg" : reg }
1878        iop = InstObjParams(name, Name,
1879                            "RegRegOp",
1880                            { "code": eWalkCode,
1881                              "r_count": rCount,
1882                              "predicate_test": predicateTest,
1883                              "op_class": opClass }, [])
1884        header_output += NeonRegRegOpDeclare.subst(iop)
1885        exec_output += NeonUnequalRegExecute.subst(iop)
1886        for type in types:
1887            substDict = { "targs" : type,
1888                          "class_name" : Name }
1889            exec_output += NeonExecDeclare.subst(substDict)
1890
1891    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1892        global header_output, exec_output
1893        eWalkCode = simdEnabledCheckCode + '''
1894        BigRegVect srcReg1;
1895        RegVect destReg;
1896        '''
1897        for reg in range(4):
1898            eWalkCode += '''
1899                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1900            ''' % { "reg" : reg }
1901        if readDest:
1902            for reg in range(2):
1903                eWalkCode += '''
1904                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1905                ''' % { "reg" : reg }
1906        readDestCode = ''
1907        if readDest:
1908            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1909        eWalkCode += '''
1910        for (unsigned i = 0; i < eCount; i++) {
1911            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1912            Element destElem;
1913            %(readDest)s
1914            %(op)s
1915            destReg.elements[i] = htog(destElem);
1916        }
1917        ''' % { "op" : op, "readDest" : readDestCode }
1918        for reg in range(2):
1919            eWalkCode += '''
1920            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1921            ''' % { "reg" : reg }
1922        iop = InstObjParams(name, Name,
1923                            "RegRegOp",
1924                            { "code": eWalkCode,
1925                              "r_count": 2,
1926                              "predicate_test": predicateTest,
1927                              "op_class": opClass }, [])
1928        header_output += NeonRegRegOpDeclare.subst(iop)
1929        exec_output += NeonUnequalRegExecute.subst(iop)
1930        for type in types:
1931            substDict = { "targs" : type,
1932                          "class_name" : Name }
1933            exec_output += NeonExecDeclare.subst(substDict)
1934
1935    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1936        global header_output, exec_output
1937        eWalkCode = simdEnabledCheckCode + '''
1938        RegVect destReg;
1939        '''
1940        if readDest:
1941            for reg in range(rCount):
1942                eWalkCode += '''
1943                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1944                ''' % { "reg" : reg }
1945        readDestCode = ''
1946        if readDest:
1947            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1948        eWalkCode += '''
1949        for (unsigned i = 0; i < eCount; i++) {
1950            Element destElem;
1951            %(readDest)s
1952            %(op)s
1953            destReg.elements[i] = htog(destElem);
1954        }
1955        ''' % { "op" : op, "readDest" : readDestCode }
1956        for reg in range(rCount):
1957            eWalkCode += '''
1958            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1959            ''' % { "reg" : reg }
1960        iop = InstObjParams(name, Name,
1961                            "RegImmOp",
1962                            { "code": eWalkCode,
1963                              "r_count": rCount,
1964                              "predicate_test": predicateTest,
1965                              "op_class": opClass }, [])
1966        header_output += NeonRegImmOpDeclare.subst(iop)
1967        exec_output += NeonEqualRegExecute.subst(iop)
1968        for type in types:
1969            substDict = { "targs" : type,
1970                          "class_name" : Name }
1971            exec_output += NeonExecDeclare.subst(substDict)
1972
1973    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1974        global header_output, exec_output
1975        eWalkCode = simdEnabledCheckCode + '''
1976        RegVect srcReg1;
1977        BigRegVect destReg;
1978        '''
1979        for reg in range(2):
1980            eWalkCode += '''
1981                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1982            ''' % { "reg" : reg }
1983        if readDest:
1984            for reg in range(4):
1985                eWalkCode += '''
1986                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1987                ''' % { "reg" : reg }
1988        readDestCode = ''
1989        if readDest:
1990            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1991        eWalkCode += '''
1992        for (unsigned i = 0; i < eCount; i++) {
1993            Element srcElem1 = gtoh(srcReg1.elements[i]);
1994            BigElement destElem;
1995            %(readDest)s
1996            %(op)s
1997            destReg.elements[i] = htog(destElem);
1998        }
1999        ''' % { "op" : op, "readDest" : readDestCode }
2000        for reg in range(4):
2001            eWalkCode += '''
2002            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
2003            ''' % { "reg" : reg }
2004        iop = InstObjParams(name, Name,
2005                            "RegRegOp",
2006                            { "code": eWalkCode,
2007                              "r_count": 2,
2008                              "predicate_test": predicateTest,
2009                              "op_class": opClass }, [])
2010        header_output += NeonRegRegOpDeclare.subst(iop)
2011        exec_output += NeonUnequalRegExecute.subst(iop)
2012        for type in types:
2013            substDict = { "targs" : type,
2014                          "class_name" : Name }
2015            exec_output += NeonExecDeclare.subst(substDict)
2016
2017    vhaddCode = '''
2018        Element carryBit =
2019            (((unsigned)srcElem1 & 0x1) +
2020             ((unsigned)srcElem2 & 0x1)) >> 1;
2021        // Use division instead of a shift to ensure the sign extension works
2022        // right. The compiler will figure out if it can be a shift. Mask the
2023        // inputs so they get truncated correctly.
2024        destElem = (((srcElem1 & ~(Element)1) / 2) +
2025                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2026    '''
2027    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2028    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2029
2030    vrhaddCode = '''
2031        Element carryBit =
2032            (((unsigned)srcElem1 & 0x1) +
2033             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2034        // Use division instead of a shift to ensure the sign extension works
2035        // right. The compiler will figure out if it can be a shift. Mask the
2036        // inputs so they get truncated correctly.
2037        destElem = (((srcElem1 & ~(Element)1) / 2) +
2038                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2039    '''
2040    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2041    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2042
2043    vhsubCode = '''
2044        Element barrowBit =
2045            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2046        // Use division instead of a shift to ensure the sign extension works
2047        // right. The compiler will figure out if it can be a shift. Mask the
2048        // inputs so they get truncated correctly.
2049        destElem = (((srcElem1 & ~(Element)1) / 2) -
2050                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2051    '''
2052    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2053    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2054
2055    vandCode = '''
2056        destElem = srcElem1 & srcElem2;
2057    '''
2058    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2059    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2060
2061    vbicCode = '''
2062        destElem = srcElem1 & ~srcElem2;
2063    '''
2064    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2065    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2066
2067    vorrCode = '''
2068        destElem = srcElem1 | srcElem2;
2069    '''
2070    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2071    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2072
2073    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2074    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2075
2076    vornCode = '''
2077        destElem = srcElem1 | ~srcElem2;
2078    '''
2079    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2080    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2081
2082    veorCode = '''
2083        destElem = srcElem1 ^ srcElem2;
2084    '''
2085    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2086    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2087
2088    vbifCode = '''
2089        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2090    '''
2091    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2092    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2093    vbitCode = '''
2094        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2095    '''
2096    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2097    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2098    vbslCode = '''
2099        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2100    '''
2101    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2102    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2103
2104    vmaxCode = '''
2105        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2106    '''
2107    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2108    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2109
2110    vminCode = '''
2111        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2112    '''
2113    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2114    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2115
2116    vaddCode = '''
2117        destElem = srcElem1 + srcElem2;
2118    '''
2119    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2120    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2121
2122    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2123                      2, vaddCode, pairwise=True)
2124    vaddlwCode = '''
2125        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2126    '''
2127    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2128    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2129    vaddhnCode = '''
2130        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2131                   (sizeof(Element) * 8);
2132    '''
2133    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2134    vraddhnCode = '''
2135        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2136                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2137                   (sizeof(Element) * 8);
2138    '''
2139    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2140
2141    vsubCode = '''
2142        destElem = srcElem1 - srcElem2;
2143    '''
2144    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2145    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2146    vsublwCode = '''
2147        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2148    '''
2149    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2150    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2151
2152    vqaddUCode = '''
2153        destElem = srcElem1 + srcElem2;
2154        FPSCR fpscr = (FPSCR) FpscrQc;
2155        if (destElem < srcElem1 || destElem < srcElem2) {
2156            destElem = (Element)(-1);
2157            fpscr.qc = 1;
2158        }
2159        FpscrQc = fpscr;
2160    '''
2161    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2162    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2163    vsubhnCode = '''
2164        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2165                   (sizeof(Element) * 8);
2166    '''
2167    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2168    vrsubhnCode = '''
2169        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2170                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2171                   (sizeof(Element) * 8);
2172    '''
2173    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2174
2175    vqaddSCode = '''
2176        destElem = srcElem1 + srcElem2;
2177        FPSCR fpscr = (FPSCR) FpscrQc;
2178        bool negDest = (destElem < 0);
2179        bool negSrc1 = (srcElem1 < 0);
2180        bool negSrc2 = (srcElem2 < 0);
2181        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2182            if (negDest)
2183                /* If (>=0) plus (>=0) yields (<0), saturate to +. */
2184                destElem = std::numeric_limits<Element>::max();
2185            else
2186                /* If (<0) plus (<0) yields (>=0), saturate to -. */
2187                destElem = std::numeric_limits<Element>::min();
2188            fpscr.qc = 1;
2189        }
2190        FpscrQc = fpscr;
2191    '''
2192    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2193    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2194
2195    vqsubUCode = '''
2196        destElem = srcElem1 - srcElem2;
2197        FPSCR fpscr = (FPSCR) FpscrQc;
2198        if (destElem > srcElem1) {
2199            destElem = 0;
2200            fpscr.qc = 1;
2201        }
2202        FpscrQc = fpscr;
2203    '''
2204    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2205    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2206
2207    vqsubSCode = '''
2208        destElem = srcElem1 - srcElem2;
2209        FPSCR fpscr = (FPSCR) FpscrQc;
2210        bool negDest = (destElem < 0);
2211        bool negSrc1 = (srcElem1 < 0);
2212        bool posSrc2 = (srcElem2 >= 0);
2213        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2214            if (negDest)
2215                /* If (>=0) minus (<0) yields (<0), saturate to +. */
2216                destElem = std::numeric_limits<Element>::max();
2217            else
2218                /* If (<0) minus (>=0) yields (>=0), saturate to -. */
2219                destElem = std::numeric_limits<Element>::min();
2220            fpscr.qc = 1;
2221        }
2222        FpscrQc = fpscr;
2223    '''
2224    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2225    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2226
2227    vcgtCode = '''
2228        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2229    '''
2230    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2231    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2232
2233    vcgeCode = '''
2234        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2235    '''
2236    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2237    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2238
2239    vceqCode = '''
2240        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2241    '''
2242    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2243    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2244
2245    vshlCode = '''
2246        int16_t shiftAmt = (int8_t)srcElem2;
2247        if (shiftAmt < 0) {
2248            shiftAmt = -shiftAmt;
2249            if (shiftAmt >= sizeof(Element) * 8) {
2250                shiftAmt = sizeof(Element) * 8 - 1;
2251                destElem = 0;
2252            } else {
2253                destElem = (srcElem1 >> shiftAmt);
2254            }
2255            // Make sure the right shift sign extended when it should.
2256            if (ltz(srcElem1) && !ltz(destElem)) {
2257                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2258                                             1 - shiftAmt));
2259            }
2260        } else {
2261            if (shiftAmt >= sizeof(Element) * 8) {
2262                destElem = 0;
2263            } else {
2264                destElem = srcElem1 << shiftAmt;
2265            }
2266        }
2267    '''
2268    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2269    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2270
2271    vrshlCode = '''
2272        int16_t shiftAmt = (int8_t)srcElem2;
2273        if (shiftAmt < 0) {
2274            shiftAmt = -shiftAmt;
2275            Element rBit = 0;
2276            if (shiftAmt <= sizeof(Element) * 8)
2277                rBit = bits(srcElem1, shiftAmt - 1);
2278            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2279                rBit = 1;
2280            if (shiftAmt >= sizeof(Element) * 8) {
2281                shiftAmt = sizeof(Element) * 8 - 1;
2282                destElem = 0;
2283            } else {
2284                destElem = (srcElem1 >> shiftAmt);
2285            }
2286            // Make sure the right shift sign extended when it should.
2287            if (ltz(srcElem1) && !ltz(destElem)) {
2288                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2289                                             1 - shiftAmt));
2290            }
2291            destElem += rBit;
2292        } else if (shiftAmt > 0) {
2293            if (shiftAmt >= sizeof(Element) * 8) {
2294                destElem = 0;
2295            } else {
2296                destElem = srcElem1 << shiftAmt;
2297            }
2298        } else {
2299            destElem = srcElem1;
2300        }
2301    '''
2302    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2303    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2304
2305    vqshlUCode = '''
2306        int16_t shiftAmt = (int8_t)srcElem2;
2307        FPSCR fpscr = (FPSCR) FpscrQc;
2308        if (shiftAmt < 0) {
2309            shiftAmt = -shiftAmt;
2310            if (shiftAmt >= sizeof(Element) * 8) {
2311                shiftAmt = sizeof(Element) * 8 - 1;
2312                destElem = 0;
2313            } else {
2314                destElem = (srcElem1 >> shiftAmt);
2315            }
2316        } else if (shiftAmt > 0) {
2317            if (shiftAmt >= sizeof(Element) * 8) {
2318                if (srcElem1 != 0) {
2319                    destElem = mask(sizeof(Element) * 8);
2320                    fpscr.qc = 1;
2321                } else {
2322                    destElem = 0;
2323                }
2324            } else {
2325                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2326                            sizeof(Element) * 8 - shiftAmt)) {
2327                    destElem = mask(sizeof(Element) * 8);
2328                    fpscr.qc = 1;
2329                } else {
2330                    destElem = srcElem1 << shiftAmt;
2331                }
2332            }
2333        } else {
2334            destElem = srcElem1;
2335        }
2336        FpscrQc = fpscr;
2337    '''
2338    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2339    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2340
2341    vqshlSCode = '''
2342        int16_t shiftAmt = (int8_t)srcElem2;
2343        FPSCR fpscr = (FPSCR) FpscrQc;
2344        if (shiftAmt < 0) {
2345            shiftAmt = -shiftAmt;
2346            if (shiftAmt >= sizeof(Element) * 8) {
2347                shiftAmt = sizeof(Element) * 8 - 1;
2348                destElem = 0;
2349            } else {
2350                destElem = (srcElem1 >> shiftAmt);
2351            }
2352            // Make sure the right shift sign extended when it should.
2353            if (srcElem1 < 0 && destElem >= 0) {
2354                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2355                                             1 - shiftAmt));
2356            }
2357        } else if (shiftAmt > 0) {
2358            bool sat = false;
2359            if (shiftAmt >= sizeof(Element) * 8) {
2360                if (srcElem1 != 0)
2361                    sat = true;
2362                else
2363                    destElem = 0;
2364            } else {
2365                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2366                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2367                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2368                    sat = true;
2369                } else {
2370                    destElem = srcElem1 << shiftAmt;
2371                }
2372            }
2373            if (sat) {
2374                fpscr.qc = 1;
2375                destElem = mask(sizeof(Element) * 8 - 1);
2376                if (srcElem1 < 0)
2377                    destElem = ~destElem;
2378            }
2379        } else {
2380            destElem = srcElem1;
2381        }
2382        FpscrQc = fpscr;
2383    '''
2384    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2385    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2386
2387    vqrshlUCode = '''
2388        int16_t shiftAmt = (int8_t)srcElem2;
2389        FPSCR fpscr = (FPSCR) FpscrQc;
2390        if (shiftAmt < 0) {
2391            shiftAmt = -shiftAmt;
2392            Element rBit = 0;
2393            if (shiftAmt <= sizeof(Element) * 8)
2394                rBit = bits(srcElem1, shiftAmt - 1);
2395            if (shiftAmt >= sizeof(Element) * 8) {
2396                shiftAmt = sizeof(Element) * 8 - 1;
2397                destElem = 0;
2398            } else {
2399                destElem = (srcElem1 >> shiftAmt);
2400            }
2401            destElem += rBit;
2402        } else {
2403            if (shiftAmt >= sizeof(Element) * 8) {
2404                if (srcElem1 != 0) {
2405                    destElem = mask(sizeof(Element) * 8);
2406                    fpscr.qc = 1;
2407                } else {
2408                    destElem = 0;
2409                }
2410            } else {
2411                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2412                            sizeof(Element) * 8 - shiftAmt)) {
2413                    destElem = mask(sizeof(Element) * 8);
2414                    fpscr.qc = 1;
2415                } else {
2416                    destElem = srcElem1 << shiftAmt;
2417                }
2418            }
2419        }
2420        FpscrQc = fpscr;
2421    '''
2422    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2423    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2424
2425    vqrshlSCode = '''
2426        int16_t shiftAmt = (int8_t)srcElem2;
2427        FPSCR fpscr = (FPSCR) FpscrQc;
2428        if (shiftAmt < 0) {
2429            shiftAmt = -shiftAmt;
2430            Element rBit = 0;
2431            if (shiftAmt <= sizeof(Element) * 8)
2432                rBit = bits(srcElem1, shiftAmt - 1);
2433            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2434                rBit = 1;
2435            if (shiftAmt >= sizeof(Element) * 8) {
2436                shiftAmt = sizeof(Element) * 8 - 1;
2437                destElem = 0;
2438            } else {
2439                destElem = (srcElem1 >> shiftAmt);
2440            }
2441            // Make sure the right shift sign extended when it should.
2442            if (srcElem1 < 0 && destElem >= 0) {
2443                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2444                                             1 - shiftAmt));
2445            }
2446            destElem += rBit;
2447        } else if (shiftAmt > 0) {
2448            bool sat = false;
2449            if (shiftAmt >= sizeof(Element) * 8) {
2450                if (srcElem1 != 0)
2451                    sat = true;
2452                else
2453                    destElem = 0;
2454            } else {
2455                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2456                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2457                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2458                    sat = true;
2459                } else {
2460                    destElem = srcElem1 << shiftAmt;
2461                }
2462            }
2463            if (sat) {
2464                fpscr.qc = 1;
2465                destElem = mask(sizeof(Element) * 8 - 1);
2466                if (srcElem1 < 0)
2467                    destElem = ~destElem;
2468            }
2469        } else {
2470            destElem = srcElem1;
2471        }
2472        FpscrQc = fpscr;
2473    '''
2474    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2475    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2476
2477    vabaCode = '''
2478        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2479                                            (srcElem2 - srcElem1);
2480    '''
2481    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2482    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2483    vabalCode = '''
2484        destElem += (srcElem1 > srcElem2) ?
2485            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2486            ((BigElement)srcElem2 - (BigElement)srcElem1);
2487    '''
2488    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2489
2490    vabdCode = '''
2491        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2492                                           (srcElem2 - srcElem1);
2493    '''
2494    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2495    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2496    vabdlCode = '''
2497        destElem = (srcElem1 > srcElem2) ?
2498            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2499            ((BigElement)srcElem2 - (BigElement)srcElem1);
2500    '''
2501    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2502
2503    vtstCode = '''
2504        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2505    '''
2506    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2507    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2508
2509    vmulCode = '''
2510        destElem = srcElem1 * srcElem2;
2511    '''
2512    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2513    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2514    vmullCode = '''
2515        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2516    '''
2517    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2518
2519    vmlaCode = '''
2520        destElem = destElem + srcElem1 * srcElem2;
2521    '''
2522    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2523    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2524    vmlalCode = '''
2525        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2526    '''
2527    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2528
2529    vqdmlalCode = '''
2530        FPSCR fpscr = (FPSCR) FpscrQc;
2531        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2532        Element maxNeg = std::numeric_limits<Element>::min();
2533        Element halfNeg = maxNeg / 2;
2534        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2535            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2536            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2537            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2538            fpscr.qc = 1;
2539        }
2540        bool negPreDest = ltz(destElem);
2541        destElem += midElem;
2542        bool negDest = ltz(destElem);
2543        bool negMid = ltz(midElem);
2544        if (negPreDest == negMid && negMid != negDest) {
2545            destElem = mask(sizeof(BigElement) * 8 - 1);
2546            if (negPreDest)
2547                destElem = ~destElem;
2548            fpscr.qc = 1;
2549        }
2550        FpscrQc = fpscr;
2551    '''
2552    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2553
2554    vqdmlslCode = '''
2555        FPSCR fpscr = (FPSCR) FpscrQc;
2556        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2557        Element maxNeg = std::numeric_limits<Element>::min();
2558        Element halfNeg = maxNeg / 2;
2559        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2560            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2561            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2562            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2563            fpscr.qc = 1;
2564        }
2565        bool negPreDest = ltz(destElem);
2566        destElem -= midElem;
2567        bool negDest = ltz(destElem);
2568        bool posMid = ltz((BigElement)-midElem);
2569        if (negPreDest == posMid && posMid != negDest) {
2570            destElem = mask(sizeof(BigElement) * 8 - 1);
2571            if (negPreDest)
2572                destElem = ~destElem;
2573            fpscr.qc = 1;
2574        }
2575        FpscrQc = fpscr;
2576    '''
2577    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2578
2579    vqdmullCode = '''
2580        FPSCR fpscr = (FPSCR) FpscrQc;
2581        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2582        if (srcElem1 == srcElem2 &&
2583                srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2584            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2585            fpscr.qc = 1;
2586        }
2587        FpscrQc = fpscr;
2588    '''
2589    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2590
2591    vmlsCode = '''
2592        destElem = destElem - srcElem1 * srcElem2;
2593    '''
2594    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2595    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2596    vmlslCode = '''
2597        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2598    '''
2599    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2600
2601    vmulpCode = '''
2602        destElem = 0;
2603        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2604            if (bits(srcElem2, j))
2605                destElem ^= srcElem1 << j;
2606        }
2607    '''
2608    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2609    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2610    vmullpCode = '''
2611        destElem = 0;
2612        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2613            if (bits(srcElem2, j))
2614                destElem ^= (BigElement)srcElem1 << j;
2615        }
2616    '''
2617    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2618
2619    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2620
2621    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2622
2623    vqdmulhCode = '''
2624        FPSCR fpscr = (FPSCR) FpscrQc;
2625        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2626                   (sizeof(Element) * 8);
2627        if (srcElem1 == srcElem2 &&
2628                srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2629            destElem = ~srcElem1;
2630            fpscr.qc = 1;
2631        }
2632        FpscrQc = fpscr;
2633    '''
2634    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2635    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2636
2637    vqrdmulhCode = '''
2638        FPSCR fpscr = (FPSCR) FpscrQc;
2639        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2640                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2641                   (sizeof(Element) * 8);
2642        Element maxNeg = std::numeric_limits<Element>::min();
2643        Element halfNeg = maxNeg / 2;
2644        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2645            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2646            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2647            if (destElem < 0) {
2648                destElem = mask(sizeof(Element) * 8 - 1);
2649            } else {
2650                destElem = std::numeric_limits<Element>::min();
2651            }
2652            fpscr.qc = 1;
2653        }
2654        FpscrQc = fpscr;
2655    '''
2656    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2657            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2658    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2659            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2660
2661    vMinMaxFpCode = '''
2662        destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
2663    '''
2664    vMinMaxInsts = [
2665        ("vmax",   "VmaxDFp",   2, "Max",    False, ),
2666        ("vmax",   "VmaxQFp",   4, "Max",    False, ),
2667        ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ),
2668        ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ),
2669        ("vpmax",  "VpmaxDFp",  2, "Max",    True,  ),
2670        ("vpmax",  "VpmaxQFp",  4, "Max",    True,  ),
2671        ("vmin",   "VminDFp",   2, "Min",    False, ),
2672        ("vmin",   "VminQFp",   4, "Min",    False, ),
2673        ("vminnm", "VminnmDFp", 2, "MinNum", False, ),
2674        ("vminnm", "VminnmQFp", 4, "MinNum", False, ),
2675        ("vpmin",  "VpminDFp",  2, "Min",    True,  ),
2676        ("vpmin",  "VpminQFp",  4, "Min",    True,  ),
2677    ]
2678    for name, Name, rCount, op, pairwise in vMinMaxInsts:
2679        threeEqualRegInst(
2680            name,
2681            Name,
2682            "SimdFloatCmpOp",
2683            ("uint32_t",),
2684            rCount,
2685            vMinMaxFpCode % op,
2686            pairwise=pairwise,
2687            standardFpcsr=True,
2688        )
2689
2690    vaddfpCode = '''
2691        FPSCR fpscr = (FPSCR) FpscrExc;
2692        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2693                           true, true, VfpRoundNearest);
2694        FpscrExc = fpscr;
2695    '''
2696    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2697    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2698
2699    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2700                        2, vaddfpCode, pairwise=True)
2701    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2702                        4, vaddfpCode, pairwise=True)
2703
2704    vsubfpCode = '''
2705        FPSCR fpscr = (FPSCR) FpscrExc;
2706        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2707                           true, true, VfpRoundNearest);
2708        FpscrExc = fpscr;
2709    '''
2710    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2711    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2712
2713    vmulfpCode = '''
2714        FPSCR fpscr = (FPSCR) FpscrExc;
2715        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2716                           true, true, VfpRoundNearest);
2717        FpscrExc = fpscr;
2718    '''
2719    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2720    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2721
2722    vmlafpCode = '''
2723        FPSCR fpscr = (FPSCR) FpscrExc;
2724        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2725                             true, true, VfpRoundNearest);
2726        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2727                           true, true, VfpRoundNearest);
2728        FpscrExc = fpscr;
2729    '''
2730    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2731    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2732
2733    vfmafpCode = '''
2734        FPSCR fpscr = (FPSCR) FpscrExc;
2735        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2736                            true, true, VfpRoundNearest);
2737        FpscrExc = fpscr;
2738    '''
2739    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2740    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2741
2742    vfmsfpCode = '''
2743        FPSCR fpscr = (FPSCR) FpscrExc;
2744        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2745                            true, true, VfpRoundNearest);
2746        FpscrExc = fpscr;
2747    '''
2748    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2749    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2750
2751    vmlsfpCode = '''
2752        FPSCR fpscr = (FPSCR) FpscrExc;
2753        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2754                             true, true, VfpRoundNearest);
2755        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2756                           true, true, VfpRoundNearest);
2757        FpscrExc = fpscr;
2758    '''
2759    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2760    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2761
2762    vcgtfpCode = '''
2763        FPSCR fpscr = (FPSCR) FpscrExc;
2764        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2765                             true, true, VfpRoundNearest);
2766        destReg = (res == 0) ? -1 : 0;
2767        if (res == 2.0)
2768            fpscr.ioc = 1;
2769        FpscrExc = fpscr;
2770    '''
2771    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2772            2, vcgtfpCode, toInt = True)
2773    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2774            4, vcgtfpCode, toInt = True)
2775
2776    vcgefpCode = '''
2777        FPSCR fpscr = (FPSCR) FpscrExc;
2778        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2779                             true, true, VfpRoundNearest);
2780        destReg = (res == 0) ? -1 : 0;
2781        if (res == 2.0)
2782            fpscr.ioc = 1;
2783        FpscrExc = fpscr;
2784    '''
2785    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2786            2, vcgefpCode, toInt = True)
2787    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2788            4, vcgefpCode, toInt = True)
2789
2790    vacgtfpCode = '''
2791        FPSCR fpscr = (FPSCR) FpscrExc;
2792        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2793                             true, true, VfpRoundNearest);
2794        destReg = (res == 0) ? -1 : 0;
2795        if (res == 2.0)
2796            fpscr.ioc = 1;
2797        FpscrExc = fpscr;
2798    '''
2799    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2800            2, vacgtfpCode, toInt = True)
2801    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2802            4, vacgtfpCode, toInt = True)
2803
2804    vacgefpCode = '''
2805        FPSCR fpscr = (FPSCR) FpscrExc;
2806        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2807                             true, true, VfpRoundNearest);
2808        destReg = (res == 0) ? -1 : 0;
2809        if (res == 2.0)
2810            fpscr.ioc = 1;
2811        FpscrExc = fpscr;
2812    '''
2813    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2814            2, vacgefpCode, toInt = True)
2815    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2816            4, vacgefpCode, toInt = True)
2817
2818    vceqfpCode = '''
2819        FPSCR fpscr = (FPSCR) FpscrExc;
2820        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2821                             true, true, VfpRoundNearest);
2822        destReg = (res == 0) ? -1 : 0;
2823        if (res == 2.0)
2824            fpscr.ioc = 1;
2825        FpscrExc = fpscr;
2826    '''
2827    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2828            2, vceqfpCode, toInt = True)
2829    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2830            4, vceqfpCode, toInt = True)
2831
2832    vrecpsCode = '''
2833        FPSCR fpscr = (FPSCR) FpscrExc;
2834        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2835                           true, true, VfpRoundNearest);
2836        FpscrExc = fpscr;
2837    '''
2838    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2839    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2840
2841    vrsqrtsCode = '''
2842        FPSCR fpscr = (FPSCR) FpscrExc;
2843        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2844                           true, true, VfpRoundNearest);
2845        FpscrExc = fpscr;
2846    '''
2847    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2848    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2849
2850    vabdfpCode = '''
2851        FPSCR fpscr = (FPSCR) FpscrExc;
2852        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2853                             true, true, VfpRoundNearest);
2854        destReg = fabs(mid);
2855        FpscrExc = fpscr;
2856    '''
2857    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2858    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2859
2860    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2861    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2862    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2863    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2864    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2865
2866    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2867    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2868    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2869    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2870    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2871
2872    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2873    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2874    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2875    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2876    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2877
2878    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2879    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2880    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2881    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2882    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2883    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2884            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2885    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2886            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2887
2888    vshrCode = '''
2889        if (imm >= sizeof(srcElem1) * 8) {
2890            if (ltz(srcElem1))
2891                destElem = -1;
2892            else
2893                destElem = 0;
2894        } else {
2895            destElem = srcElem1 >> imm;
2896        }
2897    '''
2898    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2899    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2900
2901    vsraCode = '''
2902        Element mid;;
2903        if (imm >= sizeof(srcElem1) * 8) {
2904            mid = ltz(srcElem1) ? -1 : 0;
2905        } else {
2906            mid = srcElem1 >> imm;
2907            if (ltz(srcElem1) && !ltz(mid)) {
2908                mid |= -(mid & ((Element)1 <<
2909                            (sizeof(Element) * 8 - 1 - imm)));
2910            }
2911        }
2912        destElem += mid;
2913    '''
2914    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2915    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2916
2917    vrshrCode = '''
2918        if (imm > sizeof(srcElem1) * 8) {
2919            destElem = 0;
2920        } else if (imm) {
2921            Element rBit = bits(srcElem1, imm - 1);
2922            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2923        } else {
2924            destElem = srcElem1;
2925        }
2926    '''
2927    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2928    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2929
2930    vrsraCode = '''
2931        if (imm > sizeof(srcElem1) * 8) {
2932            destElem += 0;
2933        } else if (imm) {
2934            Element rBit = bits(srcElem1, imm - 1);
2935            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2936        } else {
2937            destElem += srcElem1;
2938        }
2939    '''
2940    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2941    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2942
2943    vsriCode = '''
2944        if (imm >= sizeof(Element) * 8) {
2945            destElem = destElem;
2946        } else {
2947            destElem = (srcElem1 >> imm) |
2948                (destElem & ~mask(sizeof(Element) * 8 - imm));
2949        }
2950    '''
2951    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2952    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2953
2954    vshlCode = '''
2955        if (imm >= sizeof(Element) * 8) {
2956            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2957        } else {
2958            destElem = srcElem1 << imm;
2959        }
2960    '''
2961    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2962    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2963
2964    vsliCode = '''
2965        if (imm >= sizeof(Element) * 8) {
2966            destElem = destElem;
2967        } else {
2968            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2969        }
2970    '''
2971    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2972    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2973
2974    vqshlCode = '''
2975        FPSCR fpscr = (FPSCR) FpscrQc;
2976        if (imm >= sizeof(Element) * 8) {
2977            if (srcElem1 != 0) {
2978                destElem = std::numeric_limits<Element>::min();
2979                if (srcElem1 > 0)
2980                    destElem = ~destElem;
2981                fpscr.qc = 1;
2982            } else {
2983                destElem = 0;
2984            }
2985        } else if (imm) {
2986            destElem = (srcElem1 << imm);
2987            uint64_t topBits = bits((uint64_t)srcElem1,
2988                                    sizeof(Element) * 8 - 1,
2989                                    sizeof(Element) * 8 - 1 - imm);
2990            if (topBits != 0 && topBits != mask(imm + 1)) {
2991                destElem = std::numeric_limits<Element>::min();
2992                if (srcElem1 > 0)
2993                    destElem = ~destElem;
2994                fpscr.qc = 1;
2995            }
2996        } else {
2997            destElem = srcElem1;
2998        }
2999        FpscrQc = fpscr;
3000    '''
3001    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3002    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3003
3004    vqshluCode = '''
3005        FPSCR fpscr = (FPSCR) FpscrQc;
3006        if (imm >= sizeof(Element) * 8) {
3007            if (srcElem1 != 0) {
3008                destElem = mask(sizeof(Element) * 8);
3009                fpscr.qc = 1;
3010            } else {
3011                destElem = 0;
3012            }
3013        } else if (imm) {
3014            destElem = (srcElem1 << imm);
3015            uint64_t topBits = bits((uint64_t)srcElem1,
3016                                    sizeof(Element) * 8 - 1,
3017                                    sizeof(Element) * 8 - imm);
3018            if (topBits != 0) {
3019                destElem = mask(sizeof(Element) * 8);
3020                fpscr.qc = 1;
3021            }
3022        } else {
3023            destElem = srcElem1;
3024        }
3025        FpscrQc = fpscr;
3026    '''
3027    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3028    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3029
3030    vqshlusCode = '''
3031        FPSCR fpscr = (FPSCR) FpscrQc;
3032        if (imm >= sizeof(Element) * 8) {
3033            if (srcElem1 < 0) {
3034                destElem = 0;
3035                fpscr.qc = 1;
3036            } else if (srcElem1 > 0) {
3037                destElem = mask(sizeof(Element) * 8);
3038                fpscr.qc = 1;
3039            } else {
3040                destElem = 0;
3041            }
3042        } else if (imm) {
3043            destElem = (srcElem1 << imm);
3044            uint64_t topBits = bits((uint64_t)srcElem1,
3045                                    sizeof(Element) * 8 - 1,
3046                                    sizeof(Element) * 8 - imm);
3047            if (srcElem1 < 0) {
3048                destElem = 0;
3049                fpscr.qc = 1;
3050            } else if (topBits != 0) {
3051                destElem = mask(sizeof(Element) * 8);
3052                fpscr.qc = 1;
3053            }
3054        } else {
3055            if (srcElem1 < 0) {
3056                fpscr.qc = 1;
3057                destElem = 0;
3058            } else {
3059                destElem = srcElem1;
3060            }
3061        }
3062        FpscrQc = fpscr;
3063    '''
3064    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3065    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3066
3067    vshrnCode = '''
3068        if (imm >= sizeof(srcElem1) * 8) {
3069            destElem = 0;
3070        } else {
3071            destElem = srcElem1 >> imm;
3072        }
3073    '''
3074    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3075
3076    vrshrnCode = '''
3077        if (imm > sizeof(srcElem1) * 8) {
3078            destElem = 0;
3079        } else if (imm) {
3080            Element rBit = bits(srcElem1, imm - 1);
3081            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3082        } else {
3083            destElem = srcElem1;
3084        }
3085    '''
3086    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3087
3088    vqshrnCode = '''
3089        FPSCR fpscr = (FPSCR) FpscrQc;
3090        if (imm > sizeof(srcElem1) * 8) {
3091            if (srcElem1 != 0 && srcElem1 != -1)
3092                fpscr.qc = 1;
3093            destElem = 0;
3094        } else if (imm) {
3095            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3096            mid |= -(mid & ((BigElement)1 <<
3097                        (sizeof(BigElement) * 8 - 1 - imm)));
3098            if (mid != (Element)mid) {
3099                destElem = mask(sizeof(Element) * 8 - 1);
3100                if (srcElem1 < 0)
3101                    destElem = ~destElem;
3102                fpscr.qc = 1;
3103            } else {
3104                destElem = mid;
3105            }
3106        } else {
3107            destElem = srcElem1;
3108        }
3109        FpscrQc = fpscr;
3110    '''
3111    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3112
3113    vqshrunCode = '''
3114        FPSCR fpscr = (FPSCR) FpscrQc;
3115        if (imm > sizeof(srcElem1) * 8) {
3116            if (srcElem1 != 0)
3117                fpscr.qc = 1;
3118            destElem = 0;
3119        } else if (imm) {
3120            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3121            if (mid != (Element)mid) {
3122                destElem = mask(sizeof(Element) * 8);
3123                fpscr.qc = 1;
3124            } else {
3125                destElem = mid;
3126            }
3127        } else {
3128            destElem = srcElem1;
3129        }
3130        FpscrQc = fpscr;
3131    '''
3132    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3133                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3134
3135    vqshrunsCode = '''
3136        FPSCR fpscr = (FPSCR) FpscrQc;
3137        if (imm > sizeof(srcElem1) * 8) {
3138            if (srcElem1 != 0)
3139                fpscr.qc = 1;
3140            destElem = 0;
3141        } else if (imm) {
3142            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3143            if (bits(mid, sizeof(BigElement) * 8 - 1,
3144                          sizeof(Element) * 8) != 0) {
3145                if (srcElem1 < 0) {
3146                    destElem = 0;
3147                } else {
3148                    destElem = mask(sizeof(Element) * 8);
3149                }
3150                fpscr.qc = 1;
3151            } else {
3152                destElem = mid;
3153            }
3154        } else {
3155            destElem = srcElem1;
3156        }
3157        FpscrQc = fpscr;
3158    '''
3159    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3160                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3161
3162    vqrshrnCode = '''
3163        FPSCR fpscr = (FPSCR) FpscrQc;
3164        if (imm > sizeof(srcElem1) * 8) {
3165            if (srcElem1 != 0 && srcElem1 != -1)
3166                fpscr.qc = 1;
3167            destElem = 0;
3168        } else if (imm) {
3169            BigElement mid = (srcElem1 >> (imm - 1));
3170            uint64_t rBit = mid & 0x1;
3171            mid >>= 1;
3172            mid |= -(mid & ((BigElement)1 <<
3173                        (sizeof(BigElement) * 8 - 1 - imm)));
3174            mid += rBit;
3175            if (mid != (Element)mid) {
3176                destElem = mask(sizeof(Element) * 8 - 1);
3177                if (srcElem1 < 0)
3178                    destElem = ~destElem;
3179                fpscr.qc = 1;
3180            } else {
3181                destElem = mid;
3182            }
3183        } else {
3184            if (srcElem1 != (Element)srcElem1) {
3185                destElem = mask(sizeof(Element) * 8 - 1);
3186                if (srcElem1 < 0)
3187                    destElem = ~destElem;
3188                fpscr.qc = 1;
3189            } else {
3190                destElem = srcElem1;
3191            }
3192        }
3193        FpscrQc = fpscr;
3194    '''
3195    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3196                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3197
3198    vqrshrunCode = '''
3199        FPSCR fpscr = (FPSCR) FpscrQc;
3200        if (imm > sizeof(srcElem1) * 8) {
3201            if (srcElem1 != 0)
3202                fpscr.qc = 1;
3203            destElem = 0;
3204        } else if (imm) {
3205            BigElement mid = (srcElem1 >> (imm - 1));
3206            uint64_t rBit = mid & 0x1;
3207            mid >>= 1;
3208            mid += rBit;
3209            if (mid != (Element)mid) {
3210                destElem = mask(sizeof(Element) * 8);
3211                fpscr.qc = 1;
3212            } else {
3213                destElem = mid;
3214            }
3215        } else {
3216            if (srcElem1 != (Element)srcElem1) {
3217                destElem = mask(sizeof(Element) * 8 - 1);
3218                fpscr.qc = 1;
3219            } else {
3220                destElem = srcElem1;
3221            }
3222        }
3223        FpscrQc = fpscr;
3224    '''
3225    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3226                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3227
3228    vqrshrunsCode = '''
3229        FPSCR fpscr = (FPSCR) FpscrQc;
3230        if (imm > sizeof(srcElem1) * 8) {
3231            if (srcElem1 != 0)
3232                fpscr.qc = 1;
3233            destElem = 0;
3234        } else if (imm) {
3235            BigElement mid = (srcElem1 >> (imm - 1));
3236            uint64_t rBit = mid & 0x1;
3237            mid >>= 1;
3238            mid |= -(mid & ((BigElement)1 <<
3239                            (sizeof(BigElement) * 8 - 1 - imm)));
3240            mid += rBit;
3241            if (bits(mid, sizeof(BigElement) * 8 - 1,
3242                          sizeof(Element) * 8) != 0) {
3243                if (srcElem1 < 0) {
3244                    destElem = 0;
3245                } else {
3246                    destElem = mask(sizeof(Element) * 8);
3247                }
3248                fpscr.qc = 1;
3249            } else {
3250                destElem = mid;
3251            }
3252        } else {
3253            if (srcElem1 < 0) {
3254                fpscr.qc = 1;
3255                destElem = 0;
3256            } else {
3257                destElem = srcElem1;
3258            }
3259        }
3260        FpscrQc = fpscr;
3261    '''
3262    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3263                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3264
3265    vshllCode = '''
3266        if (imm >= sizeof(destElem) * 8) {
3267            destElem = 0;
3268        } else {
3269            destElem = (BigElement)srcElem1 << imm;
3270        }
3271    '''
3272    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3273
3274    vmovlCode = '''
3275        destElem = srcElem1;
3276    '''
3277    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3278
3279    vcvt2ufxCode = '''
3280        FPSCR fpscr = (FPSCR) FpscrExc;
3281        if (flushToZero(srcElem1))
3282            fpscr.idc = 1;
3283        VfpSavedState state = prepFpState(VfpRoundNearest);
3284        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3285        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3286        __asm__ __volatile__("" :: "m" (destReg));
3287        finishVfp(fpscr, state, true);
3288        FpscrExc = fpscr;
3289    '''
3290    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3291            2, vcvt2ufxCode, toInt = True)
3292    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3293            4, vcvt2ufxCode, toInt = True)
3294
3295    vcvt2sfxCode = '''
3296        FPSCR fpscr = (FPSCR) FpscrExc;
3297        if (flushToZero(srcElem1))
3298            fpscr.idc = 1;
3299        VfpSavedState state = prepFpState(VfpRoundNearest);
3300        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3301        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3302        __asm__ __volatile__("" :: "m" (destReg));
3303        finishVfp(fpscr, state, true);
3304        FpscrExc = fpscr;
3305    '''
3306    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3307            2, vcvt2sfxCode, toInt = True)
3308    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3309            4, vcvt2sfxCode, toInt = True)
3310
3311    vcvtu2fpCode = '''
3312        FPSCR fpscr = (FPSCR) FpscrExc;
3313        VfpSavedState state = prepFpState(VfpRoundNearest);
3314        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3315        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3316        __asm__ __volatile__("" :: "m" (destElem));
3317        finishVfp(fpscr, state, true);
3318        FpscrExc = fpscr;
3319    '''
3320    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3321            2, vcvtu2fpCode, fromInt = True)
3322    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3323            4, vcvtu2fpCode, fromInt = True)
3324
3325    vcvts2fpCode = '''
3326        FPSCR fpscr = (FPSCR) FpscrExc;
3327        VfpSavedState state = prepFpState(VfpRoundNearest);
3328        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3329        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3330        __asm__ __volatile__("" :: "m" (destElem));
3331        finishVfp(fpscr, state, true);
3332        FpscrExc = fpscr;
3333    '''
3334    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3335            2, vcvts2fpCode, fromInt = True)
3336    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3337            4, vcvts2fpCode, fromInt = True)
3338
3339    vcvts2hCode = '''
3340        destElem = 0;
3341        FPSCR fpscr = (FPSCR) FpscrExc;
3342        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3343        if (flushToZero(srcFp1))
3344            fpscr.idc = 1;
3345        VfpSavedState state = prepFpState(VfpRoundNearest);
3346        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3347                                : "m" (srcFp1), "m" (destElem));
3348        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3349                              fpscr.ahp, srcFp1);
3350        __asm__ __volatile__("" :: "m" (destElem));
3351        finishVfp(fpscr, state, true);
3352        FpscrExc = fpscr;
3353    '''
3354    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3355
3356    vcvth2sCode = '''
3357        destElem = 0;
3358        FPSCR fpscr = (FPSCR) FpscrExc;
3359        VfpSavedState state = prepFpState(VfpRoundNearest);
3360        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3361                                : "m" (srcElem1), "m" (destElem));
3362        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3363        __asm__ __volatile__("" :: "m" (destElem));
3364        finishVfp(fpscr, state, true);
3365        FpscrExc = fpscr;
3366    '''
3367    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3368
3369    vrsqrteCode = '''
3370        destElem = unsignedRSqrtEstimate(srcElem1);
3371    '''
3372    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3373    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3374
3375    vrsqrtefpCode = '''
3376        FPSCR fpscr = (FPSCR) FpscrExc;
3377        if (flushToZero(srcReg1))
3378            fpscr.idc = 1;
3379        destReg = fprSqrtEstimate(fpscr, srcReg1);
3380        FpscrExc = fpscr;
3381    '''
3382    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3383    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3384
3385    vrecpeCode = '''
3386        destElem = unsignedRecipEstimate(srcElem1);
3387    '''
3388    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3389    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3390
3391    vrecpefpCode = '''
3392        FPSCR fpscr = (FPSCR) FpscrExc;
3393        if (flushToZero(srcReg1))
3394            fpscr.idc = 1;
3395        destReg = fpRecipEstimate(fpscr, srcReg1);
3396        FpscrExc = fpscr;
3397    '''
3398    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3399    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3400
3401    vrev16Code = '''
3402        destElem = srcElem1;
3403        unsigned groupSize = ((1 << 1) / sizeof(Element));
3404        unsigned reverseMask = (groupSize - 1);
3405        j = i ^ reverseMask;
3406    '''
3407    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3408    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3409    vrev32Code = '''
3410        destElem = srcElem1;
3411        unsigned groupSize = ((1 << 2) / sizeof(Element));
3412        unsigned reverseMask = (groupSize - 1);
3413        j = i ^ reverseMask;
3414    '''
3415    twoRegMiscInst("vrev32", "NVrev32D",
3416            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3417    twoRegMiscInst("vrev32", "NVrev32Q",
3418            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3419    vrev64Code = '''
3420        destElem = srcElem1;
3421        unsigned groupSize = ((1 << 3) / sizeof(Element));
3422        unsigned reverseMask = (groupSize - 1);
3423        j = i ^ reverseMask;
3424    '''
3425    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3426    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3427
3428    split('exec')
3429    exec_output += vcompares + vcomparesL
3430
3431    vpaddlCode = '''
3432        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3433    '''
3434    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3435    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3436
3437    vpadalCode = '''
3438        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3439    '''
3440    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3441    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3442
3443    vclsCode = '''
3444        unsigned count = 0;
3445        if (srcElem1 < 0) {
3446            srcElem1 <<= 1;
3447            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3448                count++;
3449                srcElem1 <<= 1;
3450            }
3451        } else {
3452            srcElem1 <<= 1;
3453            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3454                count++;
3455                srcElem1 <<= 1;
3456            }
3457        }
3458        destElem = count;
3459    '''
3460    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3461    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3462
3463    vclzCode = '''
3464        unsigned count = 0;
3465        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3466            count++;
3467            srcElem1 <<= 1;
3468        }
3469        destElem = count;
3470    '''
3471    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3472    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3473
3474    vcntCode = '''
3475        unsigned count = 0;
3476        while (srcElem1 && count < sizeof(Element) * 8) {
3477            count += srcElem1 & 0x1;
3478            srcElem1 >>= 1;
3479        }
3480        destElem = count;
3481    '''
3482
3483    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3484    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3485
3486    vmvnCode = '''
3487        destElem = ~srcElem1;
3488    '''
3489    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3490    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3491
3492    vqabsCode = '''
3493        FPSCR fpscr = (FPSCR) FpscrQc;
3494        if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3495            fpscr.qc = 1;
3496            destElem = ~srcElem1;
3497        } else if (srcElem1 < 0) {
3498            destElem = -srcElem1;
3499        } else {
3500            destElem = srcElem1;
3501        }
3502        FpscrQc = fpscr;
3503    '''
3504    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3505    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3506
3507    vqnegCode = '''
3508        FPSCR fpscr = (FPSCR) FpscrQc;
3509        if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3510            fpscr.qc = 1;
3511            destElem = ~srcElem1;
3512        } else {
3513            destElem = -srcElem1;
3514        }
3515        FpscrQc = fpscr;
3516    '''
3517    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3518    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3519
3520    vabsCode = '''
3521        if (srcElem1 < 0) {
3522            destElem = -srcElem1;
3523        } else {
3524            destElem = srcElem1;
3525        }
3526    '''
3527
3528    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3529    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3530    vabsfpCode = '''
3531        union
3532        {
3533            uint32_t i;
3534            float f;
3535        } cStruct;
3536        cStruct.f = srcReg1;
3537        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3538        destReg = cStruct.f;
3539    '''
3540    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3541    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3542
3543    vnegCode = '''
3544        destElem = -srcElem1;
3545    '''
3546    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3547    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3548    vnegfpCode = '''
3549        destReg = -srcReg1;
3550    '''
3551    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3552    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3553
3554    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3555    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3556    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3557    vcgtfpCode = '''
3558        FPSCR fpscr = (FPSCR) FpscrExc;
3559        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc,
3560                             true, true, VfpRoundNearest);
3561        destReg = (res == 0) ? -1 : 0;
3562        if (res == 2.0)
3563            fpscr.ioc = 1;
3564        FpscrExc = fpscr;
3565    '''
3566    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3567            2, vcgtfpCode, toInt = True)
3568    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3569            4, vcgtfpCode, toInt = True)
3570
3571    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3572    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3573    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3574    vcgefpCode = '''
3575        FPSCR fpscr = (FPSCR) FpscrExc;
3576        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc,
3577                             true, true, VfpRoundNearest);
3578        destReg = (res == 0) ? -1 : 0;
3579        if (res == 2.0)
3580            fpscr.ioc = 1;
3581        FpscrExc = fpscr;
3582    '''
3583    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3584            2, vcgefpCode, toInt = True)
3585    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3586            4, vcgefpCode, toInt = True)
3587
3588    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3589    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3590    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3591    vceqfpCode = '''
3592        FPSCR fpscr = (FPSCR) FpscrExc;
3593        float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc,
3594                             true, true, VfpRoundNearest);
3595        destReg = (res == 0) ? -1 : 0;
3596        if (res == 2.0)
3597            fpscr.ioc = 1;
3598        FpscrExc = fpscr;
3599    '''
3600    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3601            2, vceqfpCode, toInt = True)
3602    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3603            4, vceqfpCode, toInt = True)
3604
3605    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3606    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3607    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3608    vclefpCode = '''
3609        FPSCR fpscr = (FPSCR) FpscrExc;
3610        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc,
3611                             true, true, VfpRoundNearest);
3612        destReg = (res == 0) ? -1 : 0;
3613        if (res == 2.0)
3614            fpscr.ioc = 1;
3615        FpscrExc = fpscr;
3616    '''
3617    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3618            2, vclefpCode, toInt = True)
3619    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3620            4, vclefpCode, toInt = True)
3621
3622    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3623    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3624    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3625    vcltfpCode = '''
3626        FPSCR fpscr = (FPSCR) FpscrExc;
3627        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc,
3628                             true, true, VfpRoundNearest);
3629        destReg = (res == 0) ? -1 : 0;
3630        if (res == 2.0)
3631            fpscr.ioc = 1;
3632        FpscrExc = fpscr;
3633    '''
3634    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3635            2, vcltfpCode, toInt = True)
3636    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3637            4, vcltfpCode, toInt = True)
3638
3639    vswpCode = '''
3640        uint32_t mid;
3641        for (unsigned r = 0; r < rCount; r++) {
3642            mid = srcReg1.regs[r];
3643            srcReg1.regs[r] = destReg.regs[r];
3644            destReg.regs[r] = mid;
3645        }
3646    '''
3647    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3648    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3649
3650    vtrnCode = '''
3651        Element mid;
3652        for (unsigned i = 0; i < eCount; i += 2) {
3653            mid = srcReg1.elements[i];
3654            srcReg1.elements[i] = destReg.elements[i + 1];
3655            destReg.elements[i + 1] = mid;
3656        }
3657    '''
3658    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3659            smallUnsignedTypes, 2, vtrnCode)
3660    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3661            smallUnsignedTypes, 4, vtrnCode)
3662
3663    vuzpCode = '''
3664        Element mid[eCount];
3665        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3666        for (unsigned i = 0; i < eCount / 2; i++) {
3667            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3668            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3669            destReg.elements[i] = destReg.elements[2 * i];
3670        }
3671        for (unsigned i = 0; i < eCount / 2; i++) {
3672            destReg.elements[eCount / 2 + i] = mid[2 * i];
3673        }
3674    '''
3675    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3676    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3677
3678    vzipCode = '''
3679        Element mid[eCount];
3680        memcpy(&mid, &destReg, sizeof(destReg));
3681        for (unsigned i = 0; i < eCount / 2; i++) {
3682            destReg.elements[2 * i] = mid[i];
3683            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3684        }
3685        for (int i = 0; i < eCount / 2; i++) {
3686            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3687            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3688        }
3689    '''
3690    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3691    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3692
3693    vmovnCode = 'destElem = srcElem1;'
3694    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3695
3696    vdupCode = 'destElem = srcElem1;'
3697    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3698    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3699
3700    def vdupGprInst(name, Name, opClass, types, rCount):
3701        global header_output, exec_output
3702        eWalkCode = simdEnabledCheckCode + '''
3703        RegVect destReg;
3704        for (unsigned i = 0; i < eCount; i++) {
3705            destReg.elements[i] = htog((Element)Op1);
3706        }
3707        '''
3708        for reg in range(rCount):
3709            eWalkCode += '''
3710            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3711            ''' % { "reg" : reg }
3712        iop = InstObjParams(name, Name,
3713                            "RegRegOp",
3714                            { "code": eWalkCode,
3715                              "r_count": rCount,
3716                              "predicate_test": predicateTest,
3717                              "op_class": opClass }, [])
3718        header_output += NeonRegRegOpDeclare.subst(iop)
3719        exec_output += NeonEqualRegExecute.subst(iop)
3720        for type in types:
3721            substDict = { "targs" : type,
3722                          "class_name" : Name }
3723            exec_output += NeonExecDeclare.subst(substDict)
3724    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3725    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3726
3727    vmovCode = 'destElem = imm;'
3728    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3729    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3730
3731    vorrCode = 'destElem |= imm;'
3732    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3733    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3734
3735    vmvnCode = 'destElem = ~imm;'
3736    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3737    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3738
3739    vbicCode = 'destElem &= ~imm;'
3740    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3741    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3742
3743    vqmovnCode = '''
3744    FPSCR fpscr = (FPSCR) FpscrQc;
3745    destElem = srcElem1;
3746    if ((BigElement)destElem != srcElem1) {
3747        fpscr.qc = 1;
3748        destElem = mask(sizeof(Element) * 8 - 1);
3749        if (srcElem1 < 0)
3750            destElem = ~destElem;
3751    }
3752    FpscrQc = fpscr;
3753    '''
3754    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3755
3756    vqmovunCode = '''
3757    FPSCR fpscr = (FPSCR) FpscrQc;
3758    destElem = srcElem1;
3759    if ((BigElement)destElem != srcElem1) {
3760        fpscr.qc = 1;
3761        destElem = mask(sizeof(Element) * 8);
3762    }
3763    FpscrQc = fpscr;
3764    '''
3765    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3766            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3767
3768    vqmovunsCode = '''
3769    FPSCR fpscr = (FPSCR) FpscrQc;
3770    destElem = srcElem1;
3771    if (srcElem1 < 0 ||
3772            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3773        fpscr.qc = 1;
3774        destElem = mask(sizeof(Element) * 8);
3775        if (srcElem1 < 0)
3776            destElem = ~destElem;
3777    }
3778    FpscrQc = fpscr;
3779    '''
3780    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3781            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3782
3783    def buildVext(name, Name, opClass, types, rCount, op):
3784        global header_output, exec_output
3785        eWalkCode = simdEnabledCheckCode + '''
3786        RegVect srcReg1, srcReg2, destReg;
3787        '''
3788        for reg in range(rCount):
3789            eWalkCode += '''
3790                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3791                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3792            ''' % { "reg" : reg }
3793        eWalkCode += op
3794        for reg in range(rCount):
3795            eWalkCode += '''
3796            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3797            ''' % { "reg" : reg }
3798        iop = InstObjParams(name, Name,
3799                            "RegRegRegImmOp",
3800                            { "code": eWalkCode,
3801                              "r_count": rCount,
3802                              "predicate_test": predicateTest,
3803                              "op_class": opClass }, [])
3804        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3805        exec_output += NeonEqualRegExecute.subst(iop)
3806        for type in types:
3807            substDict = { "targs" : type,
3808                          "class_name" : Name }
3809            exec_output += NeonExecDeclare.subst(substDict)
3810
3811    vextCode = '''
3812        for (unsigned i = 0; i < eCount; i++) {
3813            unsigned index = i + imm;
3814            if (index < eCount) {
3815                destReg.elements[i] = srcReg1.elements[index];
3816            } else {
3817                index -= eCount;
3818                if (index >= eCount) {
3819                    fault = std::make_shared<UndefinedInstruction>(machInst,
3820                                                                   false,
3821                                                                   mnemonic);
3822                } else {
3823                    destReg.elements[i] = srcReg2.elements[index];
3824                }
3825            }
3826        }
3827    '''
3828    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3829    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3830
3831    def buildVtbxl(name, Name, opClass, length, isVtbl):
3832        global header_output, decoder_output, exec_output
3833        code = simdEnabledCheckCode + '''
3834            union
3835            {
3836                uint8_t bytes[32];
3837                uint32_t regs[8];
3838            } table;
3839
3840            union
3841            {
3842                uint8_t bytes[8];
3843                uint32_t regs[2];
3844            } destReg, srcReg2;
3845
3846            const unsigned length = %(length)d;
3847            const bool isVtbl = %(isVtbl)s;
3848
3849            srcReg2.regs[0] = htog(FpOp2P0_uw);
3850            srcReg2.regs[1] = htog(FpOp2P1_uw);
3851
3852            destReg.regs[0] = htog(FpDestP0_uw);
3853            destReg.regs[1] = htog(FpDestP1_uw);
3854        ''' % { "length" : length, "isVtbl" : isVtbl }
3855        for reg in range(8):
3856            if reg < length * 2:
3857                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3858                        { "reg" : reg }
3859            else:
3860                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3861        code += '''
3862        for (unsigned i = 0; i < sizeof(destReg); i++) {
3863            uint8_t index = srcReg2.bytes[i];
3864            if (index < 8 * length) {
3865                destReg.bytes[i] = table.bytes[index];
3866            } else {
3867                if (isVtbl)
3868                    destReg.bytes[i] = 0;
3869                // else destReg.bytes[i] unchanged
3870            }
3871        }
3872
3873        FpDestP0_uw = gtoh(destReg.regs[0]);
3874        FpDestP1_uw = gtoh(destReg.regs[1]);
3875        '''
3876        iop = InstObjParams(name, Name,
3877                            "RegRegRegOp",
3878                            { "code": code,
3879                              "predicate_test": predicateTest,
3880                              "op_class": opClass }, [])
3881        header_output += RegRegRegOpDeclare.subst(iop)
3882        decoder_output += RegRegRegOpConstructor.subst(iop)
3883        exec_output += PredOpExecute.subst(iop)
3884
3885    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3886    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3887    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3888    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3889
3890    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3891    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3892    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3893    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3894}};
3895