neon.isa revision 10829
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                          ExtMachInst machInst, IntRegIndex dest,
98                          IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                          ExtMachInst machInst, IntRegIndex dest,
116                          IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133                             IntRegIndex dest, IntRegIndex op1,
134                             IntRegIndex op2)
135    {
136        switch (size) {
137          case 1:
138            return new Base<int16_t>(machInst, dest, op1, op2);
139          case 2:
140            return new Base<int32_t>(machInst, dest, op1, op2);
141          default:
142            return new Unknown(machInst);
143        }
144    }
145
146    template <template <typename T> class Base>
147    StaticInstPtr
148    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149                                IntRegIndex dest, IntRegIndex op1,
150                                IntRegIndex op2, uint64_t imm)
151    {
152        switch (size) {
153          case 1:
154            return new Base<int16_t>(machInst, dest, op1, op2, imm);
155          case 2:
156            return new Base<int32_t>(machInst, dest, op1, op2, imm);
157          default:
158            return new Unknown(machInst);
159        }
160    }
161
162    template <template <typename T> class Base>
163    StaticInstPtr
164    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165                           ExtMachInst machInst, IntRegIndex dest,
166                           IntRegIndex op1, IntRegIndex op2)
167    {
168        if (notSigned) {
169            return decodeNeonUThreeUSReg<Base>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<Base>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUThreeSReg(bool q, unsigned size,
181                         ExtMachInst machInst, IntRegIndex dest,
182                         IntRegIndex op1, IntRegIndex op2)
183    {
184        if (q) {
185            return decodeNeonUThreeUSReg<BaseQ>(
186                    size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonUThreeUSReg<BaseD>(
189                    size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonSThreeSReg(bool q, unsigned size,
197                         ExtMachInst machInst, IntRegIndex dest,
198                         IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonSThreeUSReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonSThreeUSReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeXReg(bool q, unsigned size,
213                         ExtMachInst machInst, IntRegIndex dest,
214                         IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUSReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUThreeXReg(bool q, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (q) {
233            return decodeNeonUThreeUReg<BaseQ>(
234                    size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonUThreeUSReg<BaseD>(
237                    size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245                          ExtMachInst machInst, IntRegIndex dest,
246                          IntRegIndex op1, IntRegIndex op2)
247    {
248        if (notSigned) {
249            return decodeNeonUThreeSReg<BaseD, BaseQ>(
250                    q, size, machInst, dest, op1, op2);
251        } else {
252            return decodeNeonSThreeSReg<BaseD, BaseQ>(
253                    q, size, machInst, dest, op1, op2);
254        }
255    }
256
257    template <template <typename T> class BaseD,
258              template <typename T> class BaseQ>
259    StaticInstPtr
260    decodeNeonUThreeReg(bool q, unsigned size,
261                        ExtMachInst machInst, IntRegIndex dest,
262                        IntRegIndex op1, IntRegIndex op2)
263    {
264        if (q) {
265            return decodeNeonUThreeUReg<BaseQ>(
266                    size, machInst, dest, op1, op2);
267        } else {
268            return decodeNeonUThreeUReg<BaseD>(
269                    size, machInst, dest, op1, op2);
270        }
271    }
272
273    template <template <typename T> class BaseD,
274              template <typename T> class BaseQ>
275    StaticInstPtr
276    decodeNeonSThreeReg(bool q, unsigned size,
277                        ExtMachInst machInst, IntRegIndex dest,
278                        IntRegIndex op1, IntRegIndex op2)
279    {
280        if (q) {
281            return decodeNeonSThreeUReg<BaseQ>(
282                    size, machInst, dest, op1, op2);
283        } else {
284            return decodeNeonSThreeUReg<BaseD>(
285                    size, machInst, dest, op1, op2);
286        }
287    }
288
289    template <template <typename T> class BaseD,
290              template <typename T> class BaseQ>
291    StaticInstPtr
292    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293                         ExtMachInst machInst, IntRegIndex dest,
294                         IntRegIndex op1, IntRegIndex op2)
295    {
296        if (notSigned) {
297            return decodeNeonUThreeReg<BaseD, BaseQ>(
298                    q, size, machInst, dest, op1, op2);
299        } else {
300            return decodeNeonSThreeReg<BaseD, BaseQ>(
301                    q, size, machInst, dest, op1, op2);
302        }
303    }
304
305    template <template <typename T> class BaseD,
306              template <typename T> class BaseQ>
307    StaticInstPtr
308    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310    {
311        if (q) {
312            if (size)
313                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314            else
315                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316        } else {
317            if (size)
318                return new Unknown(machInst);
319            else
320                return new BaseD<uint32_t>(machInst, dest, op1, op2);
321        }
322    }
323
324    template <template <typename T> class Base>
325    StaticInstPtr
326    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328    {
329        if (size)
330            return new Base<uint64_t>(machInst, dest, op1, op2);
331        else
332            return new Base<uint32_t>(machInst, dest, op1, op2);
333    }
334
335    template <template <typename T> class Base>
336    StaticInstPtr
337    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338                               IntRegIndex dest, IntRegIndex op1,
339                               IntRegIndex op2, uint64_t imm)
340    {
341        if (size)
342            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343        else
344            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345    }
346
347    template <template <typename T> class BaseD,
348              template <typename T> class BaseQ>
349    StaticInstPtr
350    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351                                IntRegIndex dest, IntRegIndex op1,
352                                IntRegIndex op2, uint64_t imm)
353    {
354        if (q) {
355            switch (size) {
356              case 1:
357                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358              case 2:
359                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360              default:
361                return new Unknown(machInst);
362            }
363        } else {
364            switch (size) {
365              case 1:
366                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367              case 2:
368                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369              default:
370                return new Unknown(machInst);
371            }
372        }
373    }
374
375    template <template <typename T> class BaseD,
376              template <typename T> class BaseQ>
377    StaticInstPtr
378    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379                                IntRegIndex dest, IntRegIndex op1,
380                                IntRegIndex op2, uint64_t imm)
381    {
382        if (q) {
383            switch (size) {
384              case 1:
385                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386              case 2:
387                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388              default:
389                return new Unknown(machInst);
390            }
391        } else {
392            switch (size) {
393              case 1:
394                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395              case 2:
396                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397              default:
398                return new Unknown(machInst);
399            }
400        }
401    }
402
403    template <template <typename T> class BaseD,
404              template <typename T> class BaseQ>
405    StaticInstPtr
406    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407                             IntRegIndex dest, IntRegIndex op1,
408                             IntRegIndex op2, uint64_t imm)
409    {
410        if (q) {
411            if (size)
412                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413            else
414                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415        } else {
416            if (size)
417                return new Unknown(machInst);
418            else
419                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420        }
421    }
422
423    template <template <typename T> class BaseD,
424              template <typename T> class BaseQ>
425    StaticInstPtr
426    decodeNeonUTwoShiftReg(bool q, unsigned size,
427                           ExtMachInst machInst, IntRegIndex dest,
428                           IntRegIndex op1, uint64_t imm)
429    {
430        if (q) {
431            switch (size) {
432              case 0:
433                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434              case 1:
435                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436              case 2:
437                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438              case 3:
439                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440              default:
441                return new Unknown(machInst);
442            }
443        } else {
444            switch (size) {
445              case 0:
446                return new BaseD<uint8_t>(machInst, dest, op1, imm);
447              case 1:
448                return new BaseD<uint16_t>(machInst, dest, op1, imm);
449              case 2:
450                return new BaseD<uint32_t>(machInst, dest, op1, imm);
451              case 3:
452                return new BaseD<uint64_t>(machInst, dest, op1, imm);
453              default:
454                return new Unknown(machInst);
455            }
456        }
457    }
458
459    template <template <typename T> class BaseD,
460              template <typename T> class BaseQ>
461    StaticInstPtr
462    decodeNeonSTwoShiftReg(bool q, unsigned size,
463                           ExtMachInst machInst, IntRegIndex dest,
464                           IntRegIndex op1, uint64_t imm)
465    {
466        if (q) {
467            switch (size) {
468              case 0:
469                return new BaseQ<int8_t>(machInst, dest, op1, imm);
470              case 1:
471                return new BaseQ<int16_t>(machInst, dest, op1, imm);
472              case 2:
473                return new BaseQ<int32_t>(machInst, dest, op1, imm);
474              case 3:
475                return new BaseQ<int64_t>(machInst, dest, op1, imm);
476              default:
477                return new Unknown(machInst);
478            }
479        } else {
480            switch (size) {
481              case 0:
482                return new BaseD<int8_t>(machInst, dest, op1, imm);
483              case 1:
484                return new BaseD<int16_t>(machInst, dest, op1, imm);
485              case 2:
486                return new BaseD<int32_t>(machInst, dest, op1, imm);
487              case 3:
488                return new BaseD<int64_t>(machInst, dest, op1, imm);
489              default:
490                return new Unknown(machInst);
491            }
492        }
493    }
494
495
496    template <template <typename T> class BaseD,
497              template <typename T> class BaseQ>
498    StaticInstPtr
499    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500                            ExtMachInst machInst, IntRegIndex dest,
501                            IntRegIndex op1, uint64_t imm)
502    {
503        if (notSigned) {
504            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505                    q, size, machInst, dest, op1, imm);
506        } else {
507            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508                    q, size, machInst, dest, op1, imm);
509        }
510    }
511
512    template <template <typename T> class Base>
513    StaticInstPtr
514    decodeNeonUTwoShiftUSReg(unsigned size,
515                             ExtMachInst machInst, IntRegIndex dest,
516                             IntRegIndex op1, uint64_t imm)
517    {
518        switch (size) {
519          case 0:
520            return new Base<uint8_t>(machInst, dest, op1, imm);
521          case 1:
522            return new Base<uint16_t>(machInst, dest, op1, imm);
523          case 2:
524            return new Base<uint32_t>(machInst, dest, op1, imm);
525          default:
526            return new Unknown(machInst);
527        }
528    }
529
530    template <template <typename T> class Base>
531    StaticInstPtr
532    decodeNeonUTwoShiftUReg(unsigned size,
533                            ExtMachInst machInst, IntRegIndex dest,
534                            IntRegIndex op1, uint64_t imm)
535    {
536        switch (size) {
537          case 0:
538            return new Base<uint8_t>(machInst, dest, op1, imm);
539          case 1:
540            return new Base<uint16_t>(machInst, dest, op1, imm);
541          case 2:
542            return new Base<uint32_t>(machInst, dest, op1, imm);
543          case 3:
544            return new Base<uint64_t>(machInst, dest, op1, imm);
545          default:
546            return new Unknown(machInst);
547        }
548    }
549
550    template <template <typename T> class Base>
551    StaticInstPtr
552    decodeNeonSTwoShiftUReg(unsigned size,
553                            ExtMachInst machInst, IntRegIndex dest,
554                            IntRegIndex op1, uint64_t imm)
555    {
556        switch (size) {
557          case 0:
558            return new Base<int8_t>(machInst, dest, op1, imm);
559          case 1:
560            return new Base<int16_t>(machInst, dest, op1, imm);
561          case 2:
562            return new Base<int32_t>(machInst, dest, op1, imm);
563          case 3:
564            return new Base<int64_t>(machInst, dest, op1, imm);
565          default:
566            return new Unknown(machInst);
567        }
568    }
569
570    template <template <typename T> class BaseD,
571              template <typename T> class BaseQ>
572    StaticInstPtr
573    decodeNeonUTwoShiftSReg(bool q, unsigned size,
574                            ExtMachInst machInst, IntRegIndex dest,
575                            IntRegIndex op1, uint64_t imm)
576    {
577        if (q) {
578            return decodeNeonUTwoShiftUSReg<BaseQ>(
579                    size, machInst, dest, op1, imm);
580        } else {
581            return decodeNeonUTwoShiftUSReg<BaseD>(
582                    size, machInst, dest, op1, imm);
583        }
584    }
585
586    template <template <typename T> class Base>
587    StaticInstPtr
588    decodeNeonSTwoShiftUSReg(unsigned size,
589                             ExtMachInst machInst, IntRegIndex dest,
590                             IntRegIndex op1, uint64_t imm)
591    {
592        switch (size) {
593          case 0:
594            return new Base<int8_t>(machInst, dest, op1, imm);
595          case 1:
596            return new Base<int16_t>(machInst, dest, op1, imm);
597          case 2:
598            return new Base<int32_t>(machInst, dest, op1, imm);
599          default:
600            return new Unknown(machInst);
601        }
602    }
603
604    template <template <typename T> class BaseD,
605              template <typename T> class BaseQ>
606    StaticInstPtr
607    decodeNeonSTwoShiftSReg(bool q, unsigned size,
608                            ExtMachInst machInst, IntRegIndex dest,
609                            IntRegIndex op1, uint64_t imm)
610    {
611        if (q) {
612            return decodeNeonSTwoShiftUSReg<BaseQ>(
613                    size, machInst, dest, op1, imm);
614        } else {
615            return decodeNeonSTwoShiftUSReg<BaseD>(
616                    size, machInst, dest, op1, imm);
617        }
618    }
619
620    template <template <typename T> class BaseD,
621              template <typename T> class BaseQ>
622    StaticInstPtr
623    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624                             ExtMachInst machInst, IntRegIndex dest,
625                             IntRegIndex op1, uint64_t imm)
626    {
627        if (notSigned) {
628            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629                    q, size, machInst, dest, op1, imm);
630        } else {
631            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632                    q, size, machInst, dest, op1, imm);
633        }
634    }
635
636    template <template <typename T> class BaseD,
637              template <typename T> class BaseQ>
638    StaticInstPtr
639    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641    {
642        if (q) {
643            return decodeNeonUTwoShiftUReg<BaseQ>(
644                size, machInst, dest, op1, imm);
645        } else {
646            return decodeNeonUTwoShiftUSReg<BaseD>(
647                size, machInst, dest, op1, imm);
648        }
649    }
650
651    template <template <typename T> class BaseD,
652              template <typename T> class BaseQ>
653    StaticInstPtr
654    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656    {
657        if (q) {
658            return decodeNeonSTwoShiftUReg<BaseQ>(
659                size, machInst, dest, op1, imm);
660        } else {
661            return decodeNeonSTwoShiftUSReg<BaseD>(
662                size, machInst, dest, op1, imm);
663        }
664    }
665
666    template <template <typename T> class Base>
667    StaticInstPtr
668    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670    {
671        if (size)
672            return new Base<uint64_t>(machInst, dest, op1, imm);
673        else
674            return new Base<uint32_t>(machInst, dest, op1, imm);
675    }
676
677    template <template <typename T> class BaseD,
678              template <typename T> class BaseQ>
679    StaticInstPtr
680    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682    {
683        if (q) {
684            if (size)
685                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686            else
687                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688        } else {
689            if (size)
690                return new Unknown(machInst);
691            else
692                return new BaseD<uint32_t>(machInst, dest, op1, imm);
693        }
694    }
695
696    template <template <typename T> class Base>
697    StaticInstPtr
698    decodeNeonUTwoMiscUSReg(unsigned size,
699                            ExtMachInst machInst, IntRegIndex dest,
700                            IntRegIndex op1)
701    {
702        switch (size) {
703          case 0:
704            return new Base<uint8_t>(machInst, dest, op1);
705          case 1:
706            return new Base<uint16_t>(machInst, dest, op1);
707          case 2:
708            return new Base<uint32_t>(machInst, dest, op1);
709          default:
710            return new Unknown(machInst);
711        }
712    }
713
714    template <template <typename T> class Base>
715    StaticInstPtr
716    decodeNeonSTwoMiscUSReg(unsigned size,
717                            ExtMachInst machInst, IntRegIndex dest,
718                            IntRegIndex op1)
719    {
720        switch (size) {
721          case 0:
722            return new Base<int8_t>(machInst, dest, op1);
723          case 1:
724            return new Base<int16_t>(machInst, dest, op1);
725          case 2:
726            return new Base<int32_t>(machInst, dest, op1);
727          default:
728            return new Unknown(machInst);
729        }
730    }
731
732    template <template <typename T> class BaseD,
733              template <typename T> class BaseQ>
734    StaticInstPtr
735    decodeNeonUTwoMiscSReg(bool q, unsigned size,
736                           ExtMachInst machInst, IntRegIndex dest,
737                           IntRegIndex op1)
738    {
739        if (q) {
740            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741        } else {
742            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743        }
744    }
745
746    template <template <typename T> class BaseD,
747              template <typename T> class BaseQ>
748    StaticInstPtr
749    decodeNeonSTwoMiscSReg(bool q, unsigned size,
750                           ExtMachInst machInst, IntRegIndex dest,
751                           IntRegIndex op1)
752    {
753        if (q) {
754            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755        } else {
756            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757        }
758    }
759
760    template <template <typename T> class Base>
761    StaticInstPtr
762    decodeNeonUTwoMiscUReg(unsigned size,
763                           ExtMachInst machInst, IntRegIndex dest,
764                           IntRegIndex op1)
765    {
766        switch (size) {
767          case 0:
768            return new Base<uint8_t>(machInst, dest, op1);
769          case 1:
770            return new Base<uint16_t>(machInst, dest, op1);
771          case 2:
772            return new Base<uint32_t>(machInst, dest, op1);
773          case 3:
774            return new Base<uint64_t>(machInst, dest, op1);
775          default:
776            return new Unknown(machInst);
777        }
778    }
779
780    template <template <typename T> class Base>
781    StaticInstPtr
782    decodeNeonSTwoMiscUReg(unsigned size,
783                           ExtMachInst machInst, IntRegIndex dest,
784                           IntRegIndex op1)
785    {
786        switch (size) {
787          case 0:
788            return new Base<int8_t>(machInst, dest, op1);
789          case 1:
790            return new Base<int16_t>(machInst, dest, op1);
791          case 2:
792            return new Base<int32_t>(machInst, dest, op1);
793          case 3:
794            return new Base<int64_t>(machInst, dest, op1);
795          default:
796            return new Unknown(machInst);
797        }
798    }
799
800    template <template <typename T> class BaseD,
801              template <typename T> class BaseQ>
802    StaticInstPtr
803    decodeNeonSTwoMiscReg(bool q, unsigned size,
804                          ExtMachInst machInst, IntRegIndex dest,
805                          IntRegIndex op1)
806    {
807        if (q) {
808            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809        } else {
810            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811        }
812    }
813
814    template <template <typename T> class BaseD,
815              template <typename T> class BaseQ>
816    StaticInstPtr
817    decodeNeonUTwoMiscReg(bool q, unsigned size,
818                          ExtMachInst machInst, IntRegIndex dest,
819                          IntRegIndex op1)
820    {
821        if (q) {
822            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823        } else {
824            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825        }
826    }
827
828    template <template <typename T> class BaseD,
829              template <typename T> class BaseQ>
830    StaticInstPtr
831    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832                            ExtMachInst machInst, IntRegIndex dest,
833                            IntRegIndex op1)
834    {
835        if (notSigned) {
836            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837                    q, size, machInst, dest, op1);
838        } else {
839            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840                    q, size, machInst, dest, op1);
841        }
842    }
843
844    template <template <typename T> class BaseD,
845              template <typename T> class BaseQ>
846    StaticInstPtr
847    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848                           IntRegIndex dest, IntRegIndex op1)
849    {
850        if (q) {
851            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852        } else {
853            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854        }
855    }
856
857    template <template <typename T> class BaseD,
858              template <typename T> class BaseQ>
859    StaticInstPtr
860    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861                           IntRegIndex dest, IntRegIndex op1)
862    {
863        if (q) {
864            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865        } else {
866            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867        }
868    }
869
870    template <template <typename T> class BaseD,
871              template <typename T> class BaseQ>
872    StaticInstPtr
873    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874                            IntRegIndex dest, IntRegIndex op1)
875    {
876        if (q) {
877            if (size)
878                return new BaseQ<uint64_t>(machInst, dest, op1);
879            else
880                return new BaseQ<uint32_t>(machInst, dest, op1);
881        } else {
882            if (size)
883                return new Unknown(machInst);
884            else
885                return new BaseD<uint32_t>(machInst, dest, op1);
886        }
887    }
888
889    template <template <typename T> class BaseD,
890              template <typename T> class BaseQ>
891    StaticInstPtr
892    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893                                   IntRegIndex dest, IntRegIndex op1)
894    {
895        if (size)
896            return new BaseQ<uint64_t>(machInst, dest, op1);
897        else
898            return new BaseD<uint32_t>(machInst, dest, op1);
899    }
900
901    template <template <typename T> class Base>
902    StaticInstPtr
903    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904                              IntRegIndex dest, IntRegIndex op1)
905    {
906        if (size)
907            return new Base<uint64_t>(machInst, dest, op1);
908        else
909            return new Base<uint32_t>(machInst, dest, op1);
910    }
911
912    template <template <typename T> class BaseD,
913              template <typename T> class BaseQ>
914    StaticInstPtr
915    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916                              IntRegIndex dest, IntRegIndex op1)
917    {
918        if (q) {
919            switch (size) {
920              case 0x0:
921                return new BaseQ<uint8_t>(machInst, dest, op1);
922              case 0x1:
923                return new BaseQ<uint16_t>(machInst, dest, op1);
924              case 0x2:
925                return new BaseQ<uint32_t>(machInst, dest, op1);
926              default:
927                return new Unknown(machInst);
928            }
929        } else {
930            switch (size) {
931              case 0x0:
932                return new BaseD<uint8_t>(machInst, dest, op1);
933              case 0x1:
934                return new BaseD<uint16_t>(machInst, dest, op1);
935              default:
936                return new Unknown(machInst);
937            }
938        }
939    }
940
941    template <template <typename T> class BaseD,
942              template <typename T> class BaseQ,
943              template <typename T> class BaseBQ>
944    StaticInstPtr
945    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946                              IntRegIndex dest, IntRegIndex op1)
947    {
948        if (q) {
949            switch (size) {
950              case 0x0:
951                return new BaseQ<uint8_t>(machInst, dest, op1);
952              case 0x1:
953                return new BaseQ<uint16_t>(machInst, dest, op1);
954              case 0x2:
955                return new BaseBQ<uint32_t>(machInst, dest, op1);
956              default:
957                return new Unknown(machInst);
958            }
959        } else {
960            switch (size) {
961              case 0x0:
962                return new BaseD<uint8_t>(machInst, dest, op1);
963              case 0x1:
964                return new BaseD<uint16_t>(machInst, dest, op1);
965              default:
966                return new Unknown(machInst);
967            }
968        }
969    }
970
971    template <template <typename T> class BaseD,
972              template <typename T> class BaseQ>
973    StaticInstPtr
974    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975                              IntRegIndex dest, IntRegIndex op1)
976    {
977        if (q) {
978            switch (size) {
979              case 0x0:
980                return new BaseQ<int8_t>(machInst, dest, op1);
981              case 0x1:
982                return new BaseQ<int16_t>(machInst, dest, op1);
983              case 0x2:
984                return new BaseQ<int32_t>(machInst, dest, op1);
985              default:
986                return new Unknown(machInst);
987            }
988        } else {
989            switch (size) {
990              case 0x0:
991                return new BaseD<int8_t>(machInst, dest, op1);
992              case 0x1:
993                return new BaseD<int16_t>(machInst, dest, op1);
994              default:
995                return new Unknown(machInst);
996            }
997        }
998    }
999
1000    template <template <typename T> class BaseD,
1001              template <typename T> class BaseQ,
1002              template <typename T> class BaseBQ>
1003    StaticInstPtr
1004    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005                                  IntRegIndex dest, IntRegIndex op1)
1006    {
1007        if (q) {
1008            switch (size) {
1009              case 0x0:
1010                return new BaseQ<uint8_t>(machInst, dest, op1);
1011              case 0x1:
1012                return new BaseQ<uint16_t>(machInst, dest, op1);
1013              case 0x2:
1014                return new BaseBQ<uint32_t>(machInst, dest, op1);
1015              default:
1016                return new Unknown(machInst);
1017            }
1018        } else {
1019            switch (size) {
1020              case 0x0:
1021                return new BaseD<uint8_t>(machInst, dest, op1);
1022              case 0x1:
1023                return new BaseD<uint16_t>(machInst, dest, op1);
1024              default:
1025                return new Unknown(machInst);
1026            }
1027        }
1028    }
1029
1030    template <template <typename T> class BaseD,
1031              template <typename T> class BaseQ,
1032              template <typename T> class BaseBQ>
1033    StaticInstPtr
1034    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035                                  IntRegIndex dest, IntRegIndex op1)
1036    {
1037        if (q) {
1038            switch (size) {
1039              case 0x0:
1040                return new BaseQ<int8_t>(machInst, dest, op1);
1041              case 0x1:
1042                return new BaseQ<int16_t>(machInst, dest, op1);
1043              case 0x2:
1044                return new BaseBQ<int32_t>(machInst, dest, op1);
1045              default:
1046                return new Unknown(machInst);
1047            }
1048        } else {
1049            switch (size) {
1050              case 0x0:
1051                return new BaseD<int8_t>(machInst, dest, op1);
1052              case 0x1:
1053                return new BaseD<int16_t>(machInst, dest, op1);
1054              default:
1055                return new Unknown(machInst);
1056            }
1057        }
1058    }
1059}};
1060
1061let {{
1062    header_output = ""
1063    exec_output = ""
1064
1065    vcompares = '''
1066    static float
1067    vcgtFunc(float op1, float op2)
1068    {
1069        if (std::isnan(op1) || std::isnan(op2))
1070            return 2.0;
1071        return (op1 > op2) ? 0.0 : 1.0;
1072    }
1073
1074    static float
1075    vcgeFunc(float op1, float op2)
1076    {
1077        if (std::isnan(op1) || std::isnan(op2))
1078            return 2.0;
1079        return (op1 >= op2) ? 0.0 : 1.0;
1080    }
1081
1082    static float
1083    vceqFunc(float op1, float op2)
1084    {
1085        if (isSnan(op1) || isSnan(op2))
1086            return 2.0;
1087        return (op1 == op2) ? 0.0 : 1.0;
1088    }
1089'''
1090    vcomparesL = '''
1091    static float
1092    vcleFunc(float op1, float op2)
1093    {
1094        if (std::isnan(op1) || std::isnan(op2))
1095            return 2.0;
1096        return (op1 <= op2) ? 0.0 : 1.0;
1097    }
1098
1099    static float
1100    vcltFunc(float op1, float op2)
1101    {
1102        if (std::isnan(op1) || std::isnan(op2))
1103            return 2.0;
1104        return (op1 < op2) ? 0.0 : 1.0;
1105    }
1106'''
1107    vacomparesG = '''
1108    static float
1109    vacgtFunc(float op1, float op2)
1110    {
1111        if (std::isnan(op1) || std::isnan(op2))
1112            return 2.0;
1113        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1114    }
1115
1116    static float
1117    vacgeFunc(float op1, float op2)
1118    {
1119        if (std::isnan(op1) || std::isnan(op2))
1120            return 2.0;
1121        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1122    }
1123'''
1124
1125    exec_output += vcompares + vacomparesG
1126
1127    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1128    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1129    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1130    signedTypes = smallSignedTypes + ("int64_t",)
1131    smallTypes = smallUnsignedTypes + smallSignedTypes
1132    allTypes = unsignedTypes + signedTypes
1133
1134    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1135                          readDest=False, pairwise=False):
1136        global header_output, exec_output
1137        eWalkCode = simdEnabledCheckCode + '''
1138        RegVect srcReg1, srcReg2, destReg;
1139        '''
1140        for reg in range(rCount):
1141            eWalkCode += '''
1142                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1143                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1144            ''' % { "reg" : reg }
1145            if readDest:
1146                eWalkCode += '''
1147                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1148                ''' % { "reg" : reg }
1149        readDestCode = ''
1150        if readDest:
1151            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1152        if pairwise:
1153            eWalkCode += '''
1154            for (unsigned i = 0; i < eCount; i++) {
1155                Element srcElem1 = gtoh(2 * i < eCount ?
1156                                        srcReg1.elements[2 * i] :
1157                                        srcReg2.elements[2 * i - eCount]);
1158                Element srcElem2 = gtoh(2 * i < eCount ?
1159                                        srcReg1.elements[2 * i + 1] :
1160                                        srcReg2.elements[2 * i + 1 - eCount]);
1161                Element destElem;
1162                %(readDest)s
1163                %(op)s
1164                destReg.elements[i] = htog(destElem);
1165            }
1166            ''' % { "op" : op, "readDest" : readDestCode }
1167        else:
1168            eWalkCode += '''
1169            for (unsigned i = 0; i < eCount; i++) {
1170                Element srcElem1 = gtoh(srcReg1.elements[i]);
1171                Element srcElem2 = gtoh(srcReg2.elements[i]);
1172                Element destElem;
1173                %(readDest)s
1174                %(op)s
1175                destReg.elements[i] = htog(destElem);
1176            }
1177            ''' % { "op" : op, "readDest" : readDestCode }
1178        for reg in range(rCount):
1179            eWalkCode += '''
1180            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1181            ''' % { "reg" : reg }
1182        iop = InstObjParams(name, Name,
1183                            "RegRegRegOp",
1184                            { "code": eWalkCode,
1185                              "r_count": rCount,
1186                              "predicate_test": predicateTest,
1187                              "op_class": opClass }, [])
1188        header_output += NeonRegRegRegOpDeclare.subst(iop)
1189        exec_output += NeonEqualRegExecute.subst(iop)
1190        for type in types:
1191            substDict = { "targs" : type,
1192                          "class_name" : Name }
1193            exec_output += NeonExecDeclare.subst(substDict)
1194
1195    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1196                            readDest=False, pairwise=False, toInt=False):
1197        global header_output, exec_output
1198        eWalkCode = simdEnabledCheckCode + '''
1199        typedef FloatReg FloatVect[rCount];
1200        FloatVect srcRegs1, srcRegs2;
1201        '''
1202        if toInt:
1203            eWalkCode += 'RegVect destRegs;\n'
1204        else:
1205            eWalkCode += 'FloatVect destRegs;\n'
1206        for reg in range(rCount):
1207            eWalkCode += '''
1208                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1209                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1210            ''' % { "reg" : reg }
1211            if readDest:
1212                if toInt:
1213                    eWalkCode += '''
1214                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1215                    ''' % { "reg" : reg }
1216                else:
1217                    eWalkCode += '''
1218                        destRegs[%(reg)d] = FpDestP%(reg)d;
1219                    ''' % { "reg" : reg }
1220        readDestCode = ''
1221        if readDest:
1222            readDestCode = 'destReg = destRegs[r];'
1223        destType = 'FloatReg'
1224        writeDest = 'destRegs[r] = destReg;'
1225        if toInt:
1226            destType = 'FloatRegBits'
1227            writeDest = 'destRegs.regs[r] = destReg;'
1228        if pairwise:
1229            eWalkCode += '''
1230            for (unsigned r = 0; r < rCount; r++) {
1231                FloatReg srcReg1 = (2 * r < rCount) ?
1232                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1233                FloatReg srcReg2 = (2 * r < rCount) ?
1234                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1235                %(destType)s destReg;
1236                %(readDest)s
1237                %(op)s
1238                %(writeDest)s
1239            }
1240            ''' % { "op" : op,
1241                    "readDest" : readDestCode,
1242                    "destType" : destType,
1243                    "writeDest" : writeDest }
1244        else:
1245            eWalkCode += '''
1246            for (unsigned r = 0; r < rCount; r++) {
1247                FloatReg srcReg1 = srcRegs1[r];
1248                FloatReg srcReg2 = srcRegs2[r];
1249                %(destType)s destReg;
1250                %(readDest)s
1251                %(op)s
1252                %(writeDest)s
1253            }
1254            ''' % { "op" : op,
1255                    "readDest" : readDestCode,
1256                    "destType" : destType,
1257                    "writeDest" : writeDest }
1258        for reg in range(rCount):
1259            if toInt:
1260                eWalkCode += '''
1261                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1262                ''' % { "reg" : reg }
1263            else:
1264                eWalkCode += '''
1265                FpDestP%(reg)d = destRegs[%(reg)d];
1266                ''' % { "reg" : reg }
1267        iop = InstObjParams(name, Name,
1268                            "FpRegRegRegOp",
1269                            { "code": eWalkCode,
1270                              "r_count": rCount,
1271                              "predicate_test": predicateTest,
1272                              "op_class": opClass }, [])
1273        header_output += NeonRegRegRegOpDeclare.subst(iop)
1274        exec_output += NeonEqualRegExecute.subst(iop)
1275        for type in types:
1276            substDict = { "targs" : type,
1277                          "class_name" : Name }
1278            exec_output += NeonExecDeclare.subst(substDict)
1279
1280    def threeUnequalRegInst(name, Name, opClass, types, op,
1281                            bigSrc1, bigSrc2, bigDest, readDest):
1282        global header_output, exec_output
1283        src1Cnt = src2Cnt = destCnt = 2
1284        src1Prefix = src2Prefix = destPrefix = ''
1285        if bigSrc1:
1286            src1Cnt = 4
1287            src1Prefix = 'Big'
1288        if bigSrc2:
1289            src2Cnt = 4
1290            src2Prefix = 'Big'
1291        if bigDest:
1292            destCnt = 4
1293            destPrefix = 'Big'
1294        eWalkCode = simdEnabledCheckCode + '''
1295            %sRegVect srcReg1;
1296            %sRegVect srcReg2;
1297            %sRegVect destReg;
1298        ''' % (src1Prefix, src2Prefix, destPrefix)
1299        for reg in range(src1Cnt):
1300            eWalkCode += '''
1301                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1302            ''' % { "reg" : reg }
1303        for reg in range(src2Cnt):
1304            eWalkCode += '''
1305                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1306            ''' % { "reg" : reg }
1307        if readDest:
1308            for reg in range(destCnt):
1309                eWalkCode += '''
1310                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1311                ''' % { "reg" : reg }
1312        readDestCode = ''
1313        if readDest:
1314            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1315        eWalkCode += '''
1316        for (unsigned i = 0; i < eCount; i++) {
1317            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1318            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1319            %(destPrefix)sElement destElem;
1320            %(readDest)s
1321            %(op)s
1322            destReg.elements[i] = htog(destElem);
1323        }
1324        ''' % { "op" : op, "readDest" : readDestCode,
1325                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1326                "destPrefix" : destPrefix }
1327        for reg in range(destCnt):
1328            eWalkCode += '''
1329            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1330            ''' % { "reg" : reg }
1331        iop = InstObjParams(name, Name,
1332                            "RegRegRegOp",
1333                            { "code": eWalkCode,
1334                              "r_count": 2,
1335                              "predicate_test": predicateTest,
1336                              "op_class": opClass }, [])
1337        header_output += NeonRegRegRegOpDeclare.subst(iop)
1338        exec_output += NeonUnequalRegExecute.subst(iop)
1339        for type in types:
1340            substDict = { "targs" : type,
1341                          "class_name" : Name }
1342            exec_output += NeonExecDeclare.subst(substDict)
1343
1344    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1345        threeUnequalRegInst(name, Name, opClass, types, op,
1346                            True, True, False, readDest)
1347
1348    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1349        threeUnequalRegInst(name, Name, opClass, types, op,
1350                            False, False, True, readDest)
1351
1352    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1353        threeUnequalRegInst(name, Name, opClass, types, op,
1354                            True, False, True, readDest)
1355
1356    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1357        global header_output, exec_output
1358        eWalkCode = simdEnabledCheckCode + '''
1359        RegVect srcReg1, srcReg2, destReg;
1360        '''
1361        for reg in range(rCount):
1362            eWalkCode += '''
1363                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1364                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1365            ''' % { "reg" : reg }
1366            if readDest:
1367                eWalkCode += '''
1368                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1369                ''' % { "reg" : reg }
1370        readDestCode = ''
1371        if readDest:
1372            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1373        eWalkCode += '''
1374        if (imm < 0 && imm >= eCount) {
1375            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1376                                                           mnemonic);
1377        } else {
1378            for (unsigned i = 0; i < eCount; i++) {
1379                Element srcElem1 = gtoh(srcReg1.elements[i]);
1380                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1381                Element destElem;
1382                %(readDest)s
1383                %(op)s
1384                destReg.elements[i] = htog(destElem);
1385            }
1386        }
1387        ''' % { "op" : op, "readDest" : readDestCode }
1388        for reg in range(rCount):
1389            eWalkCode += '''
1390            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1391            ''' % { "reg" : reg }
1392        iop = InstObjParams(name, Name,
1393                            "RegRegRegImmOp",
1394                            { "code": eWalkCode,
1395                              "r_count": rCount,
1396                              "predicate_test": predicateTest,
1397                              "op_class": opClass }, [])
1398        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1399        exec_output += NeonEqualRegExecute.subst(iop)
1400        for type in types:
1401            substDict = { "targs" : type,
1402                          "class_name" : Name }
1403            exec_output += NeonExecDeclare.subst(substDict)
1404
1405    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1406        global header_output, exec_output
1407        rCount = 2
1408        eWalkCode = simdEnabledCheckCode + '''
1409        RegVect srcReg1, srcReg2;
1410        BigRegVect destReg;
1411        '''
1412        for reg in range(rCount):
1413            eWalkCode += '''
1414                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1415                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1416            ''' % { "reg" : reg }
1417        if readDest:
1418            for reg in range(2 * rCount):
1419                eWalkCode += '''
1420                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1421                ''' % { "reg" : reg }
1422        readDestCode = ''
1423        if readDest:
1424            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1425        eWalkCode += '''
1426        if (imm < 0 && imm >= eCount) {
1427            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1428                                                          mnemonic);
1429        } else {
1430            for (unsigned i = 0; i < eCount; i++) {
1431                Element srcElem1 = gtoh(srcReg1.elements[i]);
1432                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1433                BigElement destElem;
1434                %(readDest)s
1435                %(op)s
1436                destReg.elements[i] = htog(destElem);
1437            }
1438        }
1439        ''' % { "op" : op, "readDest" : readDestCode }
1440        for reg in range(2 * rCount):
1441            eWalkCode += '''
1442            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1443            ''' % { "reg" : reg }
1444        iop = InstObjParams(name, Name,
1445                            "RegRegRegImmOp",
1446                            { "code": eWalkCode,
1447                              "r_count": rCount,
1448                              "predicate_test": predicateTest,
1449                              "op_class": opClass }, [])
1450        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1451        exec_output += NeonUnequalRegExecute.subst(iop)
1452        for type in types:
1453            substDict = { "targs" : type,
1454                          "class_name" : Name }
1455            exec_output += NeonExecDeclare.subst(substDict)
1456
1457    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1458        global header_output, exec_output
1459        eWalkCode = simdEnabledCheckCode + '''
1460        typedef FloatReg FloatVect[rCount];
1461        FloatVect srcRegs1, srcRegs2, destRegs;
1462        '''
1463        for reg in range(rCount):
1464            eWalkCode += '''
1465                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1466                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1467            ''' % { "reg" : reg }
1468            if readDest:
1469                eWalkCode += '''
1470                    destRegs[%(reg)d] = FpDestP%(reg)d;
1471                ''' % { "reg" : reg }
1472        readDestCode = ''
1473        if readDest:
1474            readDestCode = 'destReg = destRegs[i];'
1475        eWalkCode += '''
1476        if (imm < 0 && imm >= eCount) {
1477            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1478                                                           mnemonic);
1479        } else {
1480            for (unsigned i = 0; i < rCount; i++) {
1481                FloatReg srcReg1 = srcRegs1[i];
1482                FloatReg srcReg2 = srcRegs2[imm];
1483                FloatReg destReg;
1484                %(readDest)s
1485                %(op)s
1486                destRegs[i] = destReg;
1487            }
1488        }
1489        ''' % { "op" : op, "readDest" : readDestCode }
1490        for reg in range(rCount):
1491            eWalkCode += '''
1492            FpDestP%(reg)d = destRegs[%(reg)d];
1493            ''' % { "reg" : reg }
1494        iop = InstObjParams(name, Name,
1495                            "FpRegRegRegImmOp",
1496                            { "code": eWalkCode,
1497                              "r_count": rCount,
1498                              "predicate_test": predicateTest,
1499                              "op_class": opClass }, [])
1500        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1501        exec_output += NeonEqualRegExecute.subst(iop)
1502        for type in types:
1503            substDict = { "targs" : type,
1504                          "class_name" : Name }
1505            exec_output += NeonExecDeclare.subst(substDict)
1506
1507    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1508            readDest=False, toInt=False, fromInt=False):
1509        global header_output, exec_output
1510        eWalkCode = simdEnabledCheckCode + '''
1511        RegVect srcRegs1, destRegs;
1512        '''
1513        for reg in range(rCount):
1514            eWalkCode += '''
1515                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1516            ''' % { "reg" : reg }
1517            if readDest:
1518                eWalkCode += '''
1519                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1520                ''' % { "reg" : reg }
1521        readDestCode = ''
1522        if readDest:
1523            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1524            if toInt:
1525                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1526        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1527        if fromInt:
1528            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1529        declDest = 'Element destElem;'
1530        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1531        if toInt:
1532            declDest = 'FloatRegBits destReg;'
1533            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1534        eWalkCode += '''
1535        for (unsigned i = 0; i < eCount; i++) {
1536            %(readOp)s
1537            %(declDest)s
1538            %(readDest)s
1539            %(op)s
1540            %(writeDest)s
1541        }
1542        ''' % { "readOp" : readOpCode,
1543                "declDest" : declDest,
1544                "readDest" : readDestCode,
1545                "op" : op,
1546                "writeDest" : writeDestCode }
1547        for reg in range(rCount):
1548            eWalkCode += '''
1549            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1550            ''' % { "reg" : reg }
1551        iop = InstObjParams(name, Name,
1552                            "RegRegImmOp",
1553                            { "code": eWalkCode,
1554                              "r_count": rCount,
1555                              "predicate_test": predicateTest,
1556                              "op_class": opClass }, [])
1557        header_output += NeonRegRegImmOpDeclare.subst(iop)
1558        exec_output += NeonEqualRegExecute.subst(iop)
1559        for type in types:
1560            substDict = { "targs" : type,
1561                          "class_name" : Name }
1562            exec_output += NeonExecDeclare.subst(substDict)
1563
1564    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1565        global header_output, exec_output
1566        eWalkCode = simdEnabledCheckCode + '''
1567        BigRegVect srcReg1;
1568        RegVect destReg;
1569        '''
1570        for reg in range(4):
1571            eWalkCode += '''
1572                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1573            ''' % { "reg" : reg }
1574        if readDest:
1575            for reg in range(2):
1576                eWalkCode += '''
1577                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1578                ''' % { "reg" : reg }
1579        readDestCode = ''
1580        if readDest:
1581            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1582        eWalkCode += '''
1583        for (unsigned i = 0; i < eCount; i++) {
1584            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1585            Element destElem;
1586            %(readDest)s
1587            %(op)s
1588            destReg.elements[i] = htog(destElem);
1589        }
1590        ''' % { "op" : op, "readDest" : readDestCode }
1591        for reg in range(2):
1592            eWalkCode += '''
1593            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1594            ''' % { "reg" : reg }
1595        iop = InstObjParams(name, Name,
1596                            "RegRegImmOp",
1597                            { "code": eWalkCode,
1598                              "r_count": 2,
1599                              "predicate_test": predicateTest,
1600                              "op_class": opClass }, [])
1601        header_output += NeonRegRegImmOpDeclare.subst(iop)
1602        exec_output += NeonUnequalRegExecute.subst(iop)
1603        for type in types:
1604            substDict = { "targs" : type,
1605                          "class_name" : Name }
1606            exec_output += NeonExecDeclare.subst(substDict)
1607
1608    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1609        global header_output, exec_output
1610        eWalkCode = simdEnabledCheckCode + '''
1611        RegVect srcReg1;
1612        BigRegVect destReg;
1613        '''
1614        for reg in range(2):
1615            eWalkCode += '''
1616                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1617            ''' % { "reg" : reg }
1618        if readDest:
1619            for reg in range(4):
1620                eWalkCode += '''
1621                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1622                ''' % { "reg" : reg }
1623        readDestCode = ''
1624        if readDest:
1625            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1626        eWalkCode += '''
1627        for (unsigned i = 0; i < eCount; i++) {
1628            Element srcElem1 = gtoh(srcReg1.elements[i]);
1629            BigElement destElem;
1630            %(readDest)s
1631            %(op)s
1632            destReg.elements[i] = htog(destElem);
1633        }
1634        ''' % { "op" : op, "readDest" : readDestCode }
1635        for reg in range(4):
1636            eWalkCode += '''
1637            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1638            ''' % { "reg" : reg }
1639        iop = InstObjParams(name, Name,
1640                            "RegRegImmOp",
1641                            { "code": eWalkCode,
1642                              "r_count": 2,
1643                              "predicate_test": predicateTest,
1644                              "op_class": opClass }, [])
1645        header_output += NeonRegRegImmOpDeclare.subst(iop)
1646        exec_output += NeonUnequalRegExecute.subst(iop)
1647        for type in types:
1648            substDict = { "targs" : type,
1649                          "class_name" : Name }
1650            exec_output += NeonExecDeclare.subst(substDict)
1651
1652    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1653        global header_output, exec_output
1654        eWalkCode = simdEnabledCheckCode + '''
1655        RegVect srcReg1, destReg;
1656        '''
1657        for reg in range(rCount):
1658            eWalkCode += '''
1659                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1660            ''' % { "reg" : reg }
1661            if readDest:
1662                eWalkCode += '''
1663                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1664                ''' % { "reg" : reg }
1665        readDestCode = ''
1666        if readDest:
1667            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1668        eWalkCode += '''
1669        for (unsigned i = 0; i < eCount; i++) {
1670            unsigned j = i;
1671            Element srcElem1 = gtoh(srcReg1.elements[i]);
1672            Element destElem;
1673            %(readDest)s
1674            %(op)s
1675            destReg.elements[j] = htog(destElem);
1676        }
1677        ''' % { "op" : op, "readDest" : readDestCode }
1678        for reg in range(rCount):
1679            eWalkCode += '''
1680            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1681            ''' % { "reg" : reg }
1682        iop = InstObjParams(name, Name,
1683                            "RegRegOp",
1684                            { "code": eWalkCode,
1685                              "r_count": rCount,
1686                              "predicate_test": predicateTest,
1687                              "op_class": opClass }, [])
1688        header_output += NeonRegRegOpDeclare.subst(iop)
1689        exec_output += NeonEqualRegExecute.subst(iop)
1690        for type in types:
1691            substDict = { "targs" : type,
1692                          "class_name" : Name }
1693            exec_output += NeonExecDeclare.subst(substDict)
1694
1695    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1696        global header_output, exec_output
1697        eWalkCode = simdEnabledCheckCode + '''
1698        RegVect srcReg1, destReg;
1699        '''
1700        for reg in range(rCount):
1701            eWalkCode += '''
1702                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1703            ''' % { "reg" : reg }
1704            if readDest:
1705                eWalkCode += '''
1706                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1707                ''' % { "reg" : reg }
1708        readDestCode = ''
1709        if readDest:
1710            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1711        eWalkCode += '''
1712        for (unsigned i = 0; i < eCount; i++) {
1713            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1714            Element destElem;
1715            %(readDest)s
1716            %(op)s
1717            destReg.elements[i] = htog(destElem);
1718        }
1719        ''' % { "op" : op, "readDest" : readDestCode }
1720        for reg in range(rCount):
1721            eWalkCode += '''
1722            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1723            ''' % { "reg" : reg }
1724        iop = InstObjParams(name, Name,
1725                            "RegRegImmOp",
1726                            { "code": eWalkCode,
1727                              "r_count": rCount,
1728                              "predicate_test": predicateTest,
1729                              "op_class": opClass }, [])
1730        header_output += NeonRegRegImmOpDeclare.subst(iop)
1731        exec_output += NeonEqualRegExecute.subst(iop)
1732        for type in types:
1733            substDict = { "targs" : type,
1734                          "class_name" : Name }
1735            exec_output += NeonExecDeclare.subst(substDict)
1736
1737    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1738        global header_output, exec_output
1739        eWalkCode = simdEnabledCheckCode + '''
1740        RegVect srcReg1, destReg;
1741        '''
1742        for reg in range(rCount):
1743            eWalkCode += '''
1744                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1745                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1746            ''' % { "reg" : reg }
1747            if readDest:
1748                eWalkCode += '''
1749                ''' % { "reg" : reg }
1750        readDestCode = ''
1751        if readDest:
1752            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1753        eWalkCode += op
1754        for reg in range(rCount):
1755            eWalkCode += '''
1756            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1757            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1758            ''' % { "reg" : reg }
1759        iop = InstObjParams(name, Name,
1760                            "RegRegOp",
1761                            { "code": eWalkCode,
1762                              "r_count": rCount,
1763                              "predicate_test": predicateTest,
1764                              "op_class": opClass }, [])
1765        header_output += NeonRegRegOpDeclare.subst(iop)
1766        exec_output += NeonEqualRegExecute.subst(iop)
1767        for type in types:
1768            substDict = { "targs" : type,
1769                          "class_name" : Name }
1770            exec_output += NeonExecDeclare.subst(substDict)
1771
1772    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1773            readDest=False, toInt=False):
1774        global header_output, exec_output
1775        eWalkCode = simdEnabledCheckCode + '''
1776        typedef FloatReg FloatVect[rCount];
1777        FloatVect srcRegs1;
1778        '''
1779        if toInt:
1780            eWalkCode += 'RegVect destRegs;\n'
1781        else:
1782            eWalkCode += 'FloatVect destRegs;\n'
1783        for reg in range(rCount):
1784            eWalkCode += '''
1785                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1786            ''' % { "reg" : reg }
1787            if readDest:
1788                if toInt:
1789                    eWalkCode += '''
1790                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1791                    ''' % { "reg" : reg }
1792                else:
1793                    eWalkCode += '''
1794                        destRegs[%(reg)d] = FpDestP%(reg)d;
1795                    ''' % { "reg" : reg }
1796        readDestCode = ''
1797        if readDest:
1798            readDestCode = 'destReg = destRegs[i];'
1799        destType = 'FloatReg'
1800        writeDest = 'destRegs[r] = destReg;'
1801        if toInt:
1802            destType = 'FloatRegBits'
1803            writeDest = 'destRegs.regs[r] = destReg;'
1804        eWalkCode += '''
1805        for (unsigned r = 0; r < rCount; r++) {
1806            FloatReg srcReg1 = srcRegs1[r];
1807            %(destType)s destReg;
1808            %(readDest)s
1809            %(op)s
1810            %(writeDest)s
1811        }
1812        ''' % { "op" : op,
1813                "readDest" : readDestCode,
1814                "destType" : destType,
1815                "writeDest" : writeDest }
1816        for reg in range(rCount):
1817            if toInt:
1818                eWalkCode += '''
1819                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1820                ''' % { "reg" : reg }
1821            else:
1822                eWalkCode += '''
1823                FpDestP%(reg)d = destRegs[%(reg)d];
1824                ''' % { "reg" : reg }
1825        iop = InstObjParams(name, Name,
1826                            "FpRegRegOp",
1827                            { "code": eWalkCode,
1828                              "r_count": rCount,
1829                              "predicate_test": predicateTest,
1830                              "op_class": opClass }, [])
1831        header_output += NeonRegRegOpDeclare.subst(iop)
1832        exec_output += NeonEqualRegExecute.subst(iop)
1833        for type in types:
1834            substDict = { "targs" : type,
1835                          "class_name" : Name }
1836            exec_output += NeonExecDeclare.subst(substDict)
1837
1838    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1839        global header_output, exec_output
1840        eWalkCode = simdEnabledCheckCode + '''
1841        RegVect srcRegs;
1842        BigRegVect destReg;
1843        '''
1844        for reg in range(rCount):
1845            eWalkCode += '''
1846                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1847            ''' % { "reg" : reg }
1848            if readDest:
1849                eWalkCode += '''
1850                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1851                ''' % { "reg" : reg }
1852        readDestCode = ''
1853        if readDest:
1854            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1855        eWalkCode += '''
1856        for (unsigned i = 0; i < eCount / 2; i++) {
1857            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1858            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1859            BigElement destElem;
1860            %(readDest)s
1861            %(op)s
1862            destReg.elements[i] = htog(destElem);
1863        }
1864        ''' % { "op" : op, "readDest" : readDestCode }
1865        for reg in range(rCount):
1866            eWalkCode += '''
1867            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1868            ''' % { "reg" : reg }
1869        iop = InstObjParams(name, Name,
1870                            "RegRegOp",
1871                            { "code": eWalkCode,
1872                              "r_count": rCount,
1873                              "predicate_test": predicateTest,
1874                              "op_class": opClass }, [])
1875        header_output += NeonRegRegOpDeclare.subst(iop)
1876        exec_output += NeonUnequalRegExecute.subst(iop)
1877        for type in types:
1878            substDict = { "targs" : type,
1879                          "class_name" : Name }
1880            exec_output += NeonExecDeclare.subst(substDict)
1881
1882    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1883        global header_output, exec_output
1884        eWalkCode = simdEnabledCheckCode + '''
1885        BigRegVect srcReg1;
1886        RegVect destReg;
1887        '''
1888        for reg in range(4):
1889            eWalkCode += '''
1890                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1891            ''' % { "reg" : reg }
1892        if readDest:
1893            for reg in range(2):
1894                eWalkCode += '''
1895                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1896                ''' % { "reg" : reg }
1897        readDestCode = ''
1898        if readDest:
1899            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1900        eWalkCode += '''
1901        for (unsigned i = 0; i < eCount; i++) {
1902            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1903            Element destElem;
1904            %(readDest)s
1905            %(op)s
1906            destReg.elements[i] = htog(destElem);
1907        }
1908        ''' % { "op" : op, "readDest" : readDestCode }
1909        for reg in range(2):
1910            eWalkCode += '''
1911            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1912            ''' % { "reg" : reg }
1913        iop = InstObjParams(name, Name,
1914                            "RegRegOp",
1915                            { "code": eWalkCode,
1916                              "r_count": 2,
1917                              "predicate_test": predicateTest,
1918                              "op_class": opClass }, [])
1919        header_output += NeonRegRegOpDeclare.subst(iop)
1920        exec_output += NeonUnequalRegExecute.subst(iop)
1921        for type in types:
1922            substDict = { "targs" : type,
1923                          "class_name" : Name }
1924            exec_output += NeonExecDeclare.subst(substDict)
1925
1926    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1927        global header_output, exec_output
1928        eWalkCode = simdEnabledCheckCode + '''
1929        RegVect destReg;
1930        '''
1931        if readDest:
1932            for reg in range(rCount):
1933                eWalkCode += '''
1934                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1935                ''' % { "reg" : reg }
1936        readDestCode = ''
1937        if readDest:
1938            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1939        eWalkCode += '''
1940        for (unsigned i = 0; i < eCount; i++) {
1941            Element destElem;
1942            %(readDest)s
1943            %(op)s
1944            destReg.elements[i] = htog(destElem);
1945        }
1946        ''' % { "op" : op, "readDest" : readDestCode }
1947        for reg in range(rCount):
1948            eWalkCode += '''
1949            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1950            ''' % { "reg" : reg }
1951        iop = InstObjParams(name, Name,
1952                            "RegImmOp",
1953                            { "code": eWalkCode,
1954                              "r_count": rCount,
1955                              "predicate_test": predicateTest,
1956                              "op_class": opClass }, [])
1957        header_output += NeonRegImmOpDeclare.subst(iop)
1958        exec_output += NeonEqualRegExecute.subst(iop)
1959        for type in types:
1960            substDict = { "targs" : type,
1961                          "class_name" : Name }
1962            exec_output += NeonExecDeclare.subst(substDict)
1963
1964    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1965        global header_output, exec_output
1966        eWalkCode = simdEnabledCheckCode + '''
1967        RegVect srcReg1;
1968        BigRegVect destReg;
1969        '''
1970        for reg in range(2):
1971            eWalkCode += '''
1972                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1973            ''' % { "reg" : reg }
1974        if readDest:
1975            for reg in range(4):
1976                eWalkCode += '''
1977                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1978                ''' % { "reg" : reg }
1979        readDestCode = ''
1980        if readDest:
1981            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1982        eWalkCode += '''
1983        for (unsigned i = 0; i < eCount; i++) {
1984            Element srcElem1 = gtoh(srcReg1.elements[i]);
1985            BigElement destElem;
1986            %(readDest)s
1987            %(op)s
1988            destReg.elements[i] = htog(destElem);
1989        }
1990        ''' % { "op" : op, "readDest" : readDestCode }
1991        for reg in range(4):
1992            eWalkCode += '''
1993            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1994            ''' % { "reg" : reg }
1995        iop = InstObjParams(name, Name,
1996                            "RegRegOp",
1997                            { "code": eWalkCode,
1998                              "r_count": 2,
1999                              "predicate_test": predicateTest,
2000                              "op_class": opClass }, [])
2001        header_output += NeonRegRegOpDeclare.subst(iop)
2002        exec_output += NeonUnequalRegExecute.subst(iop)
2003        for type in types:
2004            substDict = { "targs" : type,
2005                          "class_name" : Name }
2006            exec_output += NeonExecDeclare.subst(substDict)
2007
2008    vhaddCode = '''
2009        Element carryBit =
2010            (((unsigned)srcElem1 & 0x1) +
2011             ((unsigned)srcElem2 & 0x1)) >> 1;
2012        // Use division instead of a shift to ensure the sign extension works
2013        // right. The compiler will figure out if it can be a shift. Mask the
2014        // inputs so they get truncated correctly.
2015        destElem = (((srcElem1 & ~(Element)1) / 2) +
2016                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2017    '''
2018    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2019    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2020
2021    vrhaddCode = '''
2022        Element carryBit =
2023            (((unsigned)srcElem1 & 0x1) +
2024             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2025        // Use division instead of a shift to ensure the sign extension works
2026        // right. The compiler will figure out if it can be a shift. Mask the
2027        // inputs so they get truncated correctly.
2028        destElem = (((srcElem1 & ~(Element)1) / 2) +
2029                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2030    '''
2031    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2032    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2033
2034    vhsubCode = '''
2035        Element barrowBit =
2036            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2037        // Use division instead of a shift to ensure the sign extension works
2038        // right. The compiler will figure out if it can be a shift. Mask the
2039        // inputs so they get truncated correctly.
2040        destElem = (((srcElem1 & ~(Element)1) / 2) -
2041                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2042    '''
2043    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2044    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2045
2046    vandCode = '''
2047        destElem = srcElem1 & srcElem2;
2048    '''
2049    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2050    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2051
2052    vbicCode = '''
2053        destElem = srcElem1 & ~srcElem2;
2054    '''
2055    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2056    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2057
2058    vorrCode = '''
2059        destElem = srcElem1 | srcElem2;
2060    '''
2061    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2062    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2063
2064    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2065    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2066
2067    vornCode = '''
2068        destElem = srcElem1 | ~srcElem2;
2069    '''
2070    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2071    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2072
2073    veorCode = '''
2074        destElem = srcElem1 ^ srcElem2;
2075    '''
2076    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2077    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2078
2079    vbifCode = '''
2080        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2081    '''
2082    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2083    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2084    vbitCode = '''
2085        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2086    '''
2087    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2088    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2089    vbslCode = '''
2090        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2091    '''
2092    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2093    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2094
2095    vmaxCode = '''
2096        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2097    '''
2098    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2099    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2100
2101    vminCode = '''
2102        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2103    '''
2104    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2105    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2106
2107    vaddCode = '''
2108        destElem = srcElem1 + srcElem2;
2109    '''
2110    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2111    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2112
2113    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2114                      2, vaddCode, pairwise=True)
2115    vaddlwCode = '''
2116        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2117    '''
2118    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2119    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2120    vaddhnCode = '''
2121        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2122                   (sizeof(Element) * 8);
2123    '''
2124    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2125    vraddhnCode = '''
2126        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2127                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2128                   (sizeof(Element) * 8);
2129    '''
2130    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2131
2132    vsubCode = '''
2133        destElem = srcElem1 - srcElem2;
2134    '''
2135    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2136    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2137    vsublwCode = '''
2138        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2139    '''
2140    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2141    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2142
2143    vqaddUCode = '''
2144        destElem = srcElem1 + srcElem2;
2145        FPSCR fpscr = (FPSCR) FpscrQc;
2146        if (destElem < srcElem1 || destElem < srcElem2) {
2147            destElem = (Element)(-1);
2148            fpscr.qc = 1;
2149        }
2150        FpscrQc = fpscr;
2151    '''
2152    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2153    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2154    vsubhnCode = '''
2155        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2156                   (sizeof(Element) * 8);
2157    '''
2158    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2159    vrsubhnCode = '''
2160        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2161                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2162                   (sizeof(Element) * 8);
2163    '''
2164    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2165
2166    vqaddSCode = '''
2167        destElem = srcElem1 + srcElem2;
2168        FPSCR fpscr = (FPSCR) FpscrQc;
2169        bool negDest = (destElem < 0);
2170        bool negSrc1 = (srcElem1 < 0);
2171        bool negSrc2 = (srcElem2 < 0);
2172        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2173            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2174            if (negDest)
2175                destElem -= 1;
2176            fpscr.qc = 1;
2177        }
2178        FpscrQc = fpscr;
2179    '''
2180    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2181    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2182
2183    vqsubUCode = '''
2184        destElem = srcElem1 - srcElem2;
2185        FPSCR fpscr = (FPSCR) FpscrQc;
2186        if (destElem > srcElem1) {
2187            destElem = 0;
2188            fpscr.qc = 1;
2189        }
2190        FpscrQc = fpscr;
2191    '''
2192    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2193    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2194
2195    vqsubSCode = '''
2196        destElem = srcElem1 - srcElem2;
2197        FPSCR fpscr = (FPSCR) FpscrQc;
2198        bool negDest = (destElem < 0);
2199        bool negSrc1 = (srcElem1 < 0);
2200        bool posSrc2 = (srcElem2 >= 0);
2201        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2202            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2203            if (negDest)
2204                destElem -= 1;
2205            fpscr.qc = 1;
2206        }
2207        FpscrQc = fpscr;
2208    '''
2209    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2210    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2211
2212    vcgtCode = '''
2213        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2214    '''
2215    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2216    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2217
2218    vcgeCode = '''
2219        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2220    '''
2221    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2222    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2223
2224    vceqCode = '''
2225        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2226    '''
2227    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2228    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2229
2230    vshlCode = '''
2231        int16_t shiftAmt = (int8_t)srcElem2;
2232        if (shiftAmt < 0) {
2233            shiftAmt = -shiftAmt;
2234            if (shiftAmt >= sizeof(Element) * 8) {
2235                shiftAmt = sizeof(Element) * 8 - 1;
2236                destElem = 0;
2237            } else {
2238                destElem = (srcElem1 >> shiftAmt);
2239            }
2240            // Make sure the right shift sign extended when it should.
2241            if (ltz(srcElem1) && !ltz(destElem)) {
2242                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2243                                             1 - shiftAmt));
2244            }
2245        } else {
2246            if (shiftAmt >= sizeof(Element) * 8) {
2247                destElem = 0;
2248            } else {
2249                destElem = srcElem1 << shiftAmt;
2250            }
2251        }
2252    '''
2253    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2254    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2255
2256    vrshlCode = '''
2257        int16_t shiftAmt = (int8_t)srcElem2;
2258        if (shiftAmt < 0) {
2259            shiftAmt = -shiftAmt;
2260            Element rBit = 0;
2261            if (shiftAmt <= sizeof(Element) * 8)
2262                rBit = bits(srcElem1, shiftAmt - 1);
2263            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2264                rBit = 1;
2265            if (shiftAmt >= sizeof(Element) * 8) {
2266                shiftAmt = sizeof(Element) * 8 - 1;
2267                destElem = 0;
2268            } else {
2269                destElem = (srcElem1 >> shiftAmt);
2270            }
2271            // Make sure the right shift sign extended when it should.
2272            if (ltz(srcElem1) && !ltz(destElem)) {
2273                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2274                                             1 - shiftAmt));
2275            }
2276            destElem += rBit;
2277        } else if (shiftAmt > 0) {
2278            if (shiftAmt >= sizeof(Element) * 8) {
2279                destElem = 0;
2280            } else {
2281                destElem = srcElem1 << shiftAmt;
2282            }
2283        } else {
2284            destElem = srcElem1;
2285        }
2286    '''
2287    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2288    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2289
2290    vqshlUCode = '''
2291        int16_t shiftAmt = (int8_t)srcElem2;
2292        FPSCR fpscr = (FPSCR) FpscrQc;
2293        if (shiftAmt < 0) {
2294            shiftAmt = -shiftAmt;
2295            if (shiftAmt >= sizeof(Element) * 8) {
2296                shiftAmt = sizeof(Element) * 8 - 1;
2297                destElem = 0;
2298            } else {
2299                destElem = (srcElem1 >> shiftAmt);
2300            }
2301        } else if (shiftAmt > 0) {
2302            if (shiftAmt >= sizeof(Element) * 8) {
2303                if (srcElem1 != 0) {
2304                    destElem = mask(sizeof(Element) * 8);
2305                    fpscr.qc = 1;
2306                } else {
2307                    destElem = 0;
2308                }
2309            } else {
2310                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2311                            sizeof(Element) * 8 - shiftAmt)) {
2312                    destElem = mask(sizeof(Element) * 8);
2313                    fpscr.qc = 1;
2314                } else {
2315                    destElem = srcElem1 << shiftAmt;
2316                }
2317            }
2318        } else {
2319            destElem = srcElem1;
2320        }
2321        FpscrQc = fpscr;
2322    '''
2323    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2324    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2325
2326    vqshlSCode = '''
2327        int16_t shiftAmt = (int8_t)srcElem2;
2328        FPSCR fpscr = (FPSCR) FpscrQc;
2329        if (shiftAmt < 0) {
2330            shiftAmt = -shiftAmt;
2331            if (shiftAmt >= sizeof(Element) * 8) {
2332                shiftAmt = sizeof(Element) * 8 - 1;
2333                destElem = 0;
2334            } else {
2335                destElem = (srcElem1 >> shiftAmt);
2336            }
2337            // Make sure the right shift sign extended when it should.
2338            if (srcElem1 < 0 && destElem >= 0) {
2339                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2340                                             1 - shiftAmt));
2341            }
2342        } else if (shiftAmt > 0) {
2343            bool sat = false;
2344            if (shiftAmt >= sizeof(Element) * 8) {
2345                if (srcElem1 != 0)
2346                    sat = true;
2347                else
2348                    destElem = 0;
2349            } else {
2350                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2351                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2352                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2353                    sat = true;
2354                } else {
2355                    destElem = srcElem1 << shiftAmt;
2356                }
2357            }
2358            if (sat) {
2359                fpscr.qc = 1;
2360                destElem = mask(sizeof(Element) * 8 - 1);
2361                if (srcElem1 < 0)
2362                    destElem = ~destElem;
2363            }
2364        } else {
2365            destElem = srcElem1;
2366        }
2367        FpscrQc = fpscr;
2368    '''
2369    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2370    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2371
2372    vqrshlUCode = '''
2373        int16_t shiftAmt = (int8_t)srcElem2;
2374        FPSCR fpscr = (FPSCR) FpscrQc;
2375        if (shiftAmt < 0) {
2376            shiftAmt = -shiftAmt;
2377            Element rBit = 0;
2378            if (shiftAmt <= sizeof(Element) * 8)
2379                rBit = bits(srcElem1, shiftAmt - 1);
2380            if (shiftAmt >= sizeof(Element) * 8) {
2381                shiftAmt = sizeof(Element) * 8 - 1;
2382                destElem = 0;
2383            } else {
2384                destElem = (srcElem1 >> shiftAmt);
2385            }
2386            destElem += rBit;
2387        } else {
2388            if (shiftAmt >= sizeof(Element) * 8) {
2389                if (srcElem1 != 0) {
2390                    destElem = mask(sizeof(Element) * 8);
2391                    fpscr.qc = 1;
2392                } else {
2393                    destElem = 0;
2394                }
2395            } else {
2396                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2397                            sizeof(Element) * 8 - shiftAmt)) {
2398                    destElem = mask(sizeof(Element) * 8);
2399                    fpscr.qc = 1;
2400                } else {
2401                    destElem = srcElem1 << shiftAmt;
2402                }
2403            }
2404        }
2405        FpscrQc = fpscr;
2406    '''
2407    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2408    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2409
2410    vqrshlSCode = '''
2411        int16_t shiftAmt = (int8_t)srcElem2;
2412        FPSCR fpscr = (FPSCR) FpscrQc;
2413        if (shiftAmt < 0) {
2414            shiftAmt = -shiftAmt;
2415            Element rBit = 0;
2416            if (shiftAmt <= sizeof(Element) * 8)
2417                rBit = bits(srcElem1, shiftAmt - 1);
2418            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2419                rBit = 1;
2420            if (shiftAmt >= sizeof(Element) * 8) {
2421                shiftAmt = sizeof(Element) * 8 - 1;
2422                destElem = 0;
2423            } else {
2424                destElem = (srcElem1 >> shiftAmt);
2425            }
2426            // Make sure the right shift sign extended when it should.
2427            if (srcElem1 < 0 && destElem >= 0) {
2428                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2429                                             1 - shiftAmt));
2430            }
2431            destElem += rBit;
2432        } else if (shiftAmt > 0) {
2433            bool sat = false;
2434            if (shiftAmt >= sizeof(Element) * 8) {
2435                if (srcElem1 != 0)
2436                    sat = true;
2437                else
2438                    destElem = 0;
2439            } else {
2440                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2441                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2442                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2443                    sat = true;
2444                } else {
2445                    destElem = srcElem1 << shiftAmt;
2446                }
2447            }
2448            if (sat) {
2449                fpscr.qc = 1;
2450                destElem = mask(sizeof(Element) * 8 - 1);
2451                if (srcElem1 < 0)
2452                    destElem = ~destElem;
2453            }
2454        } else {
2455            destElem = srcElem1;
2456        }
2457        FpscrQc = fpscr;
2458    '''
2459    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2460    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2461
2462    vabaCode = '''
2463        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2464                                            (srcElem2 - srcElem1);
2465    '''
2466    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2467    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2468    vabalCode = '''
2469        destElem += (srcElem1 > srcElem2) ?
2470            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2471            ((BigElement)srcElem2 - (BigElement)srcElem1);
2472    '''
2473    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2474
2475    vabdCode = '''
2476        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2477                                           (srcElem2 - srcElem1);
2478    '''
2479    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2480    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2481    vabdlCode = '''
2482        destElem = (srcElem1 > srcElem2) ?
2483            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2484            ((BigElement)srcElem2 - (BigElement)srcElem1);
2485    '''
2486    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2487
2488    vtstCode = '''
2489        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2490    '''
2491    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2492    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2493
2494    vmulCode = '''
2495        destElem = srcElem1 * srcElem2;
2496    '''
2497    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2498    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2499    vmullCode = '''
2500        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2501    '''
2502    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2503
2504    vmlaCode = '''
2505        destElem = destElem + srcElem1 * srcElem2;
2506    '''
2507    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2508    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2509    vmlalCode = '''
2510        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2511    '''
2512    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2513
2514    vqdmlalCode = '''
2515        FPSCR fpscr = (FPSCR) FpscrQc;
2516        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2517        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2518        Element halfNeg = maxNeg / 2;
2519        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2520            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2521            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2522            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2523            fpscr.qc = 1;
2524        }
2525        bool negPreDest = ltz(destElem);
2526        destElem += midElem;
2527        bool negDest = ltz(destElem);
2528        bool negMid = ltz(midElem);
2529        if (negPreDest == negMid && negMid != negDest) {
2530            destElem = mask(sizeof(BigElement) * 8 - 1);
2531            if (negPreDest)
2532                destElem = ~destElem;
2533            fpscr.qc = 1;
2534        }
2535        FpscrQc = fpscr;
2536    '''
2537    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2538
2539    vqdmlslCode = '''
2540        FPSCR fpscr = (FPSCR) FpscrQc;
2541        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2542        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2543        Element halfNeg = maxNeg / 2;
2544        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2545            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2546            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2547            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2548            fpscr.qc = 1;
2549        }
2550        bool negPreDest = ltz(destElem);
2551        destElem -= midElem;
2552        bool negDest = ltz(destElem);
2553        bool posMid = ltz((BigElement)-midElem);
2554        if (negPreDest == posMid && posMid != negDest) {
2555            destElem = mask(sizeof(BigElement) * 8 - 1);
2556            if (negPreDest)
2557                destElem = ~destElem;
2558            fpscr.qc = 1;
2559        }
2560        FpscrQc = fpscr;
2561    '''
2562    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2563
2564    vqdmullCode = '''
2565        FPSCR fpscr = (FPSCR) FpscrQc;
2566        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2567        if (srcElem1 == srcElem2 &&
2568                srcElem1 == (Element)((Element)1 <<
2569                    (Element)(sizeof(Element) * 8 - 1))) {
2570            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2571            fpscr.qc = 1;
2572        }
2573        FpscrQc = fpscr;
2574    '''
2575    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2576
2577    vmlsCode = '''
2578        destElem = destElem - srcElem1 * srcElem2;
2579    '''
2580    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2581    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2582    vmlslCode = '''
2583        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2584    '''
2585    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2586
2587    vmulpCode = '''
2588        destElem = 0;
2589        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2590            if (bits(srcElem2, j))
2591                destElem ^= srcElem1 << j;
2592        }
2593    '''
2594    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2595    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2596    vmullpCode = '''
2597        destElem = 0;
2598        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2599            if (bits(srcElem2, j))
2600                destElem ^= (BigElement)srcElem1 << j;
2601        }
2602    '''
2603    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2604
2605    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2606
2607    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2608
2609    vqdmulhCode = '''
2610        FPSCR fpscr = (FPSCR) FpscrQc;
2611        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2612                   (sizeof(Element) * 8);
2613        if (srcElem1 == srcElem2 &&
2614                srcElem1 == (Element)((Element)1 <<
2615                    (sizeof(Element) * 8 - 1))) {
2616            destElem = ~srcElem1;
2617            fpscr.qc = 1;
2618        }
2619        FpscrQc = fpscr;
2620    '''
2621    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2622    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2623
2624    vqrdmulhCode = '''
2625        FPSCR fpscr = (FPSCR) FpscrQc;
2626        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2627                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2628                   (sizeof(Element) * 8);
2629        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2630        Element halfNeg = maxNeg / 2;
2631        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2632            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2633            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2634            if (destElem < 0) {
2635                destElem = mask(sizeof(Element) * 8 - 1);
2636            } else {
2637                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2638            }
2639            fpscr.qc = 1;
2640        }
2641        FpscrQc = fpscr;
2642    '''
2643    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2644            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2645    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2646            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2647
2648    vmaxfpCode = '''
2649        FPSCR fpscr = (FPSCR) FpscrExc;
2650        bool done;
2651        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2652        if (!done) {
2653            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2654                               true, true, VfpRoundNearest);
2655        } else if (flushToZero(srcReg1, srcReg2)) {
2656            fpscr.idc = 1;
2657        }
2658        FpscrExc = fpscr;
2659    '''
2660    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2661    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2662
2663    vminfpCode = '''
2664        FPSCR fpscr = (FPSCR) FpscrExc;
2665        bool done;
2666        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2667        if (!done) {
2668            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2669                               true, true, VfpRoundNearest);
2670        } else if (flushToZero(srcReg1, srcReg2)) {
2671            fpscr.idc = 1;
2672        }
2673        FpscrExc = fpscr;
2674    '''
2675    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2676    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2677
2678    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2679                        2, vmaxfpCode, pairwise=True)
2680    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2681                        4, vmaxfpCode, pairwise=True)
2682
2683    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2684                        2, vminfpCode, pairwise=True)
2685    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2686                        4, vminfpCode, pairwise=True)
2687
2688    vaddfpCode = '''
2689        FPSCR fpscr = (FPSCR) FpscrExc;
2690        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2691                           true, true, VfpRoundNearest);
2692        FpscrExc = fpscr;
2693    '''
2694    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2695    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2696
2697    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2698                        2, vaddfpCode, pairwise=True)
2699    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2700                        4, vaddfpCode, pairwise=True)
2701
2702    vsubfpCode = '''
2703        FPSCR fpscr = (FPSCR) FpscrExc;
2704        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2705                           true, true, VfpRoundNearest);
2706        FpscrExc = fpscr;
2707    '''
2708    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2709    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2710
2711    vmulfpCode = '''
2712        FPSCR fpscr = (FPSCR) FpscrExc;
2713        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2714                           true, true, VfpRoundNearest);
2715        FpscrExc = fpscr;
2716    '''
2717    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2718    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2719
2720    vmlafpCode = '''
2721        FPSCR fpscr = (FPSCR) FpscrExc;
2722        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2723                             true, true, VfpRoundNearest);
2724        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2725                           true, true, VfpRoundNearest);
2726        FpscrExc = fpscr;
2727    '''
2728    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2729    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2730
2731    vfmafpCode = '''
2732        FPSCR fpscr = (FPSCR) FpscrExc;
2733        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2734                            true, true, VfpRoundNearest);
2735        FpscrExc = fpscr;
2736    '''
2737    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2738    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2739
2740    vfmsfpCode = '''
2741        FPSCR fpscr = (FPSCR) FpscrExc;
2742        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2743                            true, true, VfpRoundNearest);
2744        FpscrExc = fpscr;
2745    '''
2746    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2747    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2748
2749    vmlsfpCode = '''
2750        FPSCR fpscr = (FPSCR) FpscrExc;
2751        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2752                             true, true, VfpRoundNearest);
2753        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2754                           true, true, VfpRoundNearest);
2755        FpscrExc = fpscr;
2756    '''
2757    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2758    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2759
2760    vcgtfpCode = '''
2761        FPSCR fpscr = (FPSCR) FpscrExc;
2762        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2763                             true, true, VfpRoundNearest);
2764        destReg = (res == 0) ? -1 : 0;
2765        if (res == 2.0)
2766            fpscr.ioc = 1;
2767        FpscrExc = fpscr;
2768    '''
2769    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2770            2, vcgtfpCode, toInt = True)
2771    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2772            4, vcgtfpCode, toInt = True)
2773
2774    vcgefpCode = '''
2775        FPSCR fpscr = (FPSCR) FpscrExc;
2776        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2777                             true, true, VfpRoundNearest);
2778        destReg = (res == 0) ? -1 : 0;
2779        if (res == 2.0)
2780            fpscr.ioc = 1;
2781        FpscrExc = fpscr;
2782    '''
2783    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2784            2, vcgefpCode, toInt = True)
2785    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2786            4, vcgefpCode, toInt = True)
2787
2788    vacgtfpCode = '''
2789        FPSCR fpscr = (FPSCR) FpscrExc;
2790        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2791                             true, true, VfpRoundNearest);
2792        destReg = (res == 0) ? -1 : 0;
2793        if (res == 2.0)
2794            fpscr.ioc = 1;
2795        FpscrExc = fpscr;
2796    '''
2797    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2798            2, vacgtfpCode, toInt = True)
2799    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2800            4, vacgtfpCode, toInt = True)
2801
2802    vacgefpCode = '''
2803        FPSCR fpscr = (FPSCR) FpscrExc;
2804        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2805                             true, true, VfpRoundNearest);
2806        destReg = (res == 0) ? -1 : 0;
2807        if (res == 2.0)
2808            fpscr.ioc = 1;
2809        FpscrExc = fpscr;
2810    '''
2811    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2812            2, vacgefpCode, toInt = True)
2813    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2814            4, vacgefpCode, toInt = True)
2815
2816    vceqfpCode = '''
2817        FPSCR fpscr = (FPSCR) FpscrExc;
2818        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2819                             true, true, VfpRoundNearest);
2820        destReg = (res == 0) ? -1 : 0;
2821        if (res == 2.0)
2822            fpscr.ioc = 1;
2823        FpscrExc = fpscr;
2824    '''
2825    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2826            2, vceqfpCode, toInt = True)
2827    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2828            4, vceqfpCode, toInt = True)
2829
2830    vrecpsCode = '''
2831        FPSCR fpscr = (FPSCR) FpscrExc;
2832        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2833                           true, true, VfpRoundNearest);
2834        FpscrExc = fpscr;
2835    '''
2836    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2837    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2838
2839    vrsqrtsCode = '''
2840        FPSCR fpscr = (FPSCR) FpscrExc;
2841        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2842                           true, true, VfpRoundNearest);
2843        FpscrExc = fpscr;
2844    '''
2845    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2846    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2847
2848    vabdfpCode = '''
2849        FPSCR fpscr = (FPSCR) FpscrExc;
2850        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2851                             true, true, VfpRoundNearest);
2852        destReg = fabs(mid);
2853        FpscrExc = fpscr;
2854    '''
2855    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2856    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2857
2858    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2859    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2860    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2861    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2862    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2863
2864    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2865    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2866    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2867    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2868    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2869
2870    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2871    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2872    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2873    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2874    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2875
2876    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2877    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2878    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2879    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2880    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2881    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2882            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2883    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2884            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2885
2886    vshrCode = '''
2887        if (imm >= sizeof(srcElem1) * 8) {
2888            if (ltz(srcElem1))
2889                destElem = -1;
2890            else
2891                destElem = 0;
2892        } else {
2893            destElem = srcElem1 >> imm;
2894        }
2895    '''
2896    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2897    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2898
2899    vsraCode = '''
2900        Element mid;;
2901        if (imm >= sizeof(srcElem1) * 8) {
2902            mid = ltz(srcElem1) ? -1 : 0;
2903        } else {
2904            mid = srcElem1 >> imm;
2905            if (ltz(srcElem1) && !ltz(mid)) {
2906                mid |= -(mid & ((Element)1 <<
2907                            (sizeof(Element) * 8 - 1 - imm)));
2908            }
2909        }
2910        destElem += mid;
2911    '''
2912    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2913    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2914
2915    vrshrCode = '''
2916        if (imm > sizeof(srcElem1) * 8) {
2917            destElem = 0;
2918        } else if (imm) {
2919            Element rBit = bits(srcElem1, imm - 1);
2920            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2921        } else {
2922            destElem = srcElem1;
2923        }
2924    '''
2925    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2926    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2927
2928    vrsraCode = '''
2929        if (imm > sizeof(srcElem1) * 8) {
2930            destElem += 0;
2931        } else if (imm) {
2932            Element rBit = bits(srcElem1, imm - 1);
2933            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2934        } else {
2935            destElem += srcElem1;
2936        }
2937    '''
2938    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2939    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2940
2941    vsriCode = '''
2942        if (imm >= sizeof(Element) * 8)
2943            destElem = destElem;
2944        else
2945            destElem = (srcElem1 >> imm) |
2946                (destElem & ~mask(sizeof(Element) * 8 - imm));
2947    '''
2948    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2949    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2950
2951    vshlCode = '''
2952        if (imm >= sizeof(Element) * 8)
2953            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2954        else
2955            destElem = srcElem1 << imm;
2956    '''
2957    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2958    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2959
2960    vsliCode = '''
2961        if (imm >= sizeof(Element) * 8)
2962            destElem = destElem;
2963        else
2964            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2965    '''
2966    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2967    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2968
2969    vqshlCode = '''
2970        FPSCR fpscr = (FPSCR) FpscrQc;
2971        if (imm >= sizeof(Element) * 8) {
2972            if (srcElem1 != 0) {
2973                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2974                if (srcElem1 > 0)
2975                    destElem = ~destElem;
2976                fpscr.qc = 1;
2977            } else {
2978                destElem = 0;
2979            }
2980        } else if (imm) {
2981            destElem = (srcElem1 << imm);
2982            uint64_t topBits = bits((uint64_t)srcElem1,
2983                                    sizeof(Element) * 8 - 1,
2984                                    sizeof(Element) * 8 - 1 - imm);
2985            if (topBits != 0 && topBits != mask(imm + 1)) {
2986                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2987                if (srcElem1 > 0)
2988                    destElem = ~destElem;
2989                fpscr.qc = 1;
2990            }
2991        } else {
2992            destElem = srcElem1;
2993        }
2994        FpscrQc = fpscr;
2995    '''
2996    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2997    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2998
2999    vqshluCode = '''
3000        FPSCR fpscr = (FPSCR) FpscrQc;
3001        if (imm >= sizeof(Element) * 8) {
3002            if (srcElem1 != 0) {
3003                destElem = mask(sizeof(Element) * 8);
3004                fpscr.qc = 1;
3005            } else {
3006                destElem = 0;
3007            }
3008        } else if (imm) {
3009            destElem = (srcElem1 << imm);
3010            uint64_t topBits = bits((uint64_t)srcElem1,
3011                                    sizeof(Element) * 8 - 1,
3012                                    sizeof(Element) * 8 - imm);
3013            if (topBits != 0) {
3014                destElem = mask(sizeof(Element) * 8);
3015                fpscr.qc = 1;
3016            }
3017        } else {
3018            destElem = srcElem1;
3019        }
3020        FpscrQc = fpscr;
3021    '''
3022    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3023    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3024
3025    vqshlusCode = '''
3026        FPSCR fpscr = (FPSCR) FpscrQc;
3027        if (imm >= sizeof(Element) * 8) {
3028            if (srcElem1 < 0) {
3029                destElem = 0;
3030                fpscr.qc = 1;
3031            } else if (srcElem1 > 0) {
3032                destElem = mask(sizeof(Element) * 8);
3033                fpscr.qc = 1;
3034            } else {
3035                destElem = 0;
3036            }
3037        } else if (imm) {
3038            destElem = (srcElem1 << imm);
3039            uint64_t topBits = bits((uint64_t)srcElem1,
3040                                    sizeof(Element) * 8 - 1,
3041                                    sizeof(Element) * 8 - imm);
3042            if (srcElem1 < 0) {
3043                destElem = 0;
3044                fpscr.qc = 1;
3045            } else if (topBits != 0) {
3046                destElem = mask(sizeof(Element) * 8);
3047                fpscr.qc = 1;
3048            }
3049        } else {
3050            if (srcElem1 < 0) {
3051                fpscr.qc = 1;
3052                destElem = 0;
3053            } else {
3054                destElem = srcElem1;
3055            }
3056        }
3057        FpscrQc = fpscr;
3058    '''
3059    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3060    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3061
3062    vshrnCode = '''
3063        if (imm >= sizeof(srcElem1) * 8) {
3064            destElem = 0;
3065        } else {
3066            destElem = srcElem1 >> imm;
3067        }
3068    '''
3069    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3070
3071    vrshrnCode = '''
3072        if (imm > sizeof(srcElem1) * 8) {
3073            destElem = 0;
3074        } else if (imm) {
3075            Element rBit = bits(srcElem1, imm - 1);
3076            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3077        } else {
3078            destElem = srcElem1;
3079        }
3080    '''
3081    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3082
3083    vqshrnCode = '''
3084        FPSCR fpscr = (FPSCR) FpscrQc;
3085        if (imm > sizeof(srcElem1) * 8) {
3086            if (srcElem1 != 0 && srcElem1 != -1)
3087                fpscr.qc = 1;
3088            destElem = 0;
3089        } else if (imm) {
3090            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3091            mid |= -(mid & ((BigElement)1 <<
3092                        (sizeof(BigElement) * 8 - 1 - imm)));
3093            if (mid != (Element)mid) {
3094                destElem = mask(sizeof(Element) * 8 - 1);
3095                if (srcElem1 < 0)
3096                    destElem = ~destElem;
3097                fpscr.qc = 1;
3098            } else {
3099                destElem = mid;
3100            }
3101        } else {
3102            destElem = srcElem1;
3103        }
3104        FpscrQc = fpscr;
3105    '''
3106    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3107
3108    vqshrunCode = '''
3109        FPSCR fpscr = (FPSCR) FpscrQc;
3110        if (imm > sizeof(srcElem1) * 8) {
3111            if (srcElem1 != 0)
3112                fpscr.qc = 1;
3113            destElem = 0;
3114        } else if (imm) {
3115            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3116            if (mid != (Element)mid) {
3117                destElem = mask(sizeof(Element) * 8);
3118                fpscr.qc = 1;
3119            } else {
3120                destElem = mid;
3121            }
3122        } else {
3123            destElem = srcElem1;
3124        }
3125        FpscrQc = fpscr;
3126    '''
3127    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3128                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3129
3130    vqshrunsCode = '''
3131        FPSCR fpscr = (FPSCR) FpscrQc;
3132        if (imm > sizeof(srcElem1) * 8) {
3133            if (srcElem1 != 0)
3134                fpscr.qc = 1;
3135            destElem = 0;
3136        } else if (imm) {
3137            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3138            if (bits(mid, sizeof(BigElement) * 8 - 1,
3139                          sizeof(Element) * 8) != 0) {
3140                if (srcElem1 < 0) {
3141                    destElem = 0;
3142                } else {
3143                    destElem = mask(sizeof(Element) * 8);
3144                }
3145                fpscr.qc = 1;
3146            } else {
3147                destElem = mid;
3148            }
3149        } else {
3150            destElem = srcElem1;
3151        }
3152        FpscrQc = fpscr;
3153    '''
3154    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3155                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3156
3157    vqrshrnCode = '''
3158        FPSCR fpscr = (FPSCR) FpscrQc;
3159        if (imm > sizeof(srcElem1) * 8) {
3160            if (srcElem1 != 0 && srcElem1 != -1)
3161                fpscr.qc = 1;
3162            destElem = 0;
3163        } else if (imm) {
3164            BigElement mid = (srcElem1 >> (imm - 1));
3165            uint64_t rBit = mid & 0x1;
3166            mid >>= 1;
3167            mid |= -(mid & ((BigElement)1 <<
3168                        (sizeof(BigElement) * 8 - 1 - imm)));
3169            mid += rBit;
3170            if (mid != (Element)mid) {
3171                destElem = mask(sizeof(Element) * 8 - 1);
3172                if (srcElem1 < 0)
3173                    destElem = ~destElem;
3174                fpscr.qc = 1;
3175            } else {
3176                destElem = mid;
3177            }
3178        } else {
3179            if (srcElem1 != (Element)srcElem1) {
3180                destElem = mask(sizeof(Element) * 8 - 1);
3181                if (srcElem1 < 0)
3182                    destElem = ~destElem;
3183                fpscr.qc = 1;
3184            } else {
3185                destElem = srcElem1;
3186            }
3187        }
3188        FpscrQc = fpscr;
3189    '''
3190    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3191                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3192
3193    vqrshrunCode = '''
3194        FPSCR fpscr = (FPSCR) FpscrQc;
3195        if (imm > sizeof(srcElem1) * 8) {
3196            if (srcElem1 != 0)
3197                fpscr.qc = 1;
3198            destElem = 0;
3199        } else if (imm) {
3200            BigElement mid = (srcElem1 >> (imm - 1));
3201            uint64_t rBit = mid & 0x1;
3202            mid >>= 1;
3203            mid += rBit;
3204            if (mid != (Element)mid) {
3205                destElem = mask(sizeof(Element) * 8);
3206                fpscr.qc = 1;
3207            } else {
3208                destElem = mid;
3209            }
3210        } else {
3211            if (srcElem1 != (Element)srcElem1) {
3212                destElem = mask(sizeof(Element) * 8 - 1);
3213                fpscr.qc = 1;
3214            } else {
3215                destElem = srcElem1;
3216            }
3217        }
3218        FpscrQc = fpscr;
3219    '''
3220    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3221                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3222
3223    vqrshrunsCode = '''
3224        FPSCR fpscr = (FPSCR) FpscrQc;
3225        if (imm > sizeof(srcElem1) * 8) {
3226            if (srcElem1 != 0)
3227                fpscr.qc = 1;
3228            destElem = 0;
3229        } else if (imm) {
3230            BigElement mid = (srcElem1 >> (imm - 1));
3231            uint64_t rBit = mid & 0x1;
3232            mid >>= 1;
3233            mid |= -(mid & ((BigElement)1 <<
3234                            (sizeof(BigElement) * 8 - 1 - imm)));
3235            mid += rBit;
3236            if (bits(mid, sizeof(BigElement) * 8 - 1,
3237                          sizeof(Element) * 8) != 0) {
3238                if (srcElem1 < 0) {
3239                    destElem = 0;
3240                } else {
3241                    destElem = mask(sizeof(Element) * 8);
3242                }
3243                fpscr.qc = 1;
3244            } else {
3245                destElem = mid;
3246            }
3247        } else {
3248            if (srcElem1 < 0) {
3249                fpscr.qc = 1;
3250                destElem = 0;
3251            } else {
3252                destElem = srcElem1;
3253            }
3254        }
3255        FpscrQc = fpscr;
3256    '''
3257    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3258                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3259
3260    vshllCode = '''
3261        if (imm >= sizeof(destElem) * 8) {
3262            destElem = 0;
3263        } else {
3264            destElem = (BigElement)srcElem1 << imm;
3265        }
3266    '''
3267    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3268
3269    vmovlCode = '''
3270        destElem = srcElem1;
3271    '''
3272    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3273
3274    vcvt2ufxCode = '''
3275        FPSCR fpscr = (FPSCR) FpscrExc;
3276        if (flushToZero(srcElem1))
3277            fpscr.idc = 1;
3278        VfpSavedState state = prepFpState(VfpRoundNearest);
3279        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3280        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3281        __asm__ __volatile__("" :: "m" (destReg));
3282        finishVfp(fpscr, state, true);
3283        FpscrExc = fpscr;
3284    '''
3285    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3286            2, vcvt2ufxCode, toInt = True)
3287    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3288            4, vcvt2ufxCode, toInt = True)
3289
3290    vcvt2sfxCode = '''
3291        FPSCR fpscr = (FPSCR) FpscrExc;
3292        if (flushToZero(srcElem1))
3293            fpscr.idc = 1;
3294        VfpSavedState state = prepFpState(VfpRoundNearest);
3295        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3296        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3297        __asm__ __volatile__("" :: "m" (destReg));
3298        finishVfp(fpscr, state, true);
3299        FpscrExc = fpscr;
3300    '''
3301    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3302            2, vcvt2sfxCode, toInt = True)
3303    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3304            4, vcvt2sfxCode, toInt = True)
3305
3306    vcvtu2fpCode = '''
3307        FPSCR fpscr = (FPSCR) FpscrExc;
3308        VfpSavedState state = prepFpState(VfpRoundNearest);
3309        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3310        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3311        __asm__ __volatile__("" :: "m" (destElem));
3312        finishVfp(fpscr, state, true);
3313        FpscrExc = fpscr;
3314    '''
3315    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3316            2, vcvtu2fpCode, fromInt = True)
3317    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3318            4, vcvtu2fpCode, fromInt = True)
3319
3320    vcvts2fpCode = '''
3321        FPSCR fpscr = (FPSCR) FpscrExc;
3322        VfpSavedState state = prepFpState(VfpRoundNearest);
3323        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3324        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3325        __asm__ __volatile__("" :: "m" (destElem));
3326        finishVfp(fpscr, state, true);
3327        FpscrExc = fpscr;
3328    '''
3329    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3330            2, vcvts2fpCode, fromInt = True)
3331    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3332            4, vcvts2fpCode, fromInt = True)
3333
3334    vcvts2hCode = '''
3335        destElem = 0;
3336        FPSCR fpscr = (FPSCR) FpscrExc;
3337        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3338        if (flushToZero(srcFp1))
3339            fpscr.idc = 1;
3340        VfpSavedState state = prepFpState(VfpRoundNearest);
3341        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3342                                : "m" (srcFp1), "m" (destElem));
3343        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3344                              fpscr.ahp, srcFp1);
3345        __asm__ __volatile__("" :: "m" (destElem));
3346        finishVfp(fpscr, state, true);
3347        FpscrExc = fpscr;
3348    '''
3349    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3350
3351    vcvth2sCode = '''
3352        destElem = 0;
3353        FPSCR fpscr = (FPSCR) FpscrExc;
3354        VfpSavedState state = prepFpState(VfpRoundNearest);
3355        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3356                                : "m" (srcElem1), "m" (destElem));
3357        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3358        __asm__ __volatile__("" :: "m" (destElem));
3359        finishVfp(fpscr, state, true);
3360        FpscrExc = fpscr;
3361    '''
3362    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3363
3364    vrsqrteCode = '''
3365        destElem = unsignedRSqrtEstimate(srcElem1);
3366    '''
3367    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3368    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3369
3370    vrsqrtefpCode = '''
3371        FPSCR fpscr = (FPSCR) FpscrExc;
3372        if (flushToZero(srcReg1))
3373            fpscr.idc = 1;
3374        destReg = fprSqrtEstimate(fpscr, srcReg1);
3375        FpscrExc = fpscr;
3376    '''
3377    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3378    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3379
3380    vrecpeCode = '''
3381        destElem = unsignedRecipEstimate(srcElem1);
3382    '''
3383    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3384    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3385
3386    vrecpefpCode = '''
3387        FPSCR fpscr = (FPSCR) FpscrExc;
3388        if (flushToZero(srcReg1))
3389            fpscr.idc = 1;
3390        destReg = fpRecipEstimate(fpscr, srcReg1);
3391        FpscrExc = fpscr;
3392    '''
3393    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3394    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3395
3396    vrev16Code = '''
3397        destElem = srcElem1;
3398        unsigned groupSize = ((1 << 1) / sizeof(Element));
3399        unsigned reverseMask = (groupSize - 1);
3400        j = i ^ reverseMask;
3401    '''
3402    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3403    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3404    vrev32Code = '''
3405        destElem = srcElem1;
3406        unsigned groupSize = ((1 << 2) / sizeof(Element));
3407        unsigned reverseMask = (groupSize - 1);
3408        j = i ^ reverseMask;
3409    '''
3410    twoRegMiscInst("vrev32", "NVrev32D",
3411            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3412    twoRegMiscInst("vrev32", "NVrev32Q",
3413            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3414    vrev64Code = '''
3415        destElem = srcElem1;
3416        unsigned groupSize = ((1 << 3) / sizeof(Element));
3417        unsigned reverseMask = (groupSize - 1);
3418        j = i ^ reverseMask;
3419    '''
3420    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3421    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3422
3423    split('exec')
3424    exec_output += vcompares + vcomparesL
3425
3426    vpaddlCode = '''
3427        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3428    '''
3429    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3430    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3431
3432    vpadalCode = '''
3433        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3434    '''
3435    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3436    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3437
3438    vclsCode = '''
3439        unsigned count = 0;
3440        if (srcElem1 < 0) {
3441            srcElem1 <<= 1;
3442            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3443                count++;
3444                srcElem1 <<= 1;
3445            }
3446        } else {
3447            srcElem1 <<= 1;
3448            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3449                count++;
3450                srcElem1 <<= 1;
3451            }
3452        }
3453        destElem = count;
3454    '''
3455    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3456    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3457
3458    vclzCode = '''
3459        unsigned count = 0;
3460        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3461            count++;
3462            srcElem1 <<= 1;
3463        }
3464        destElem = count;
3465    '''
3466    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3467    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3468
3469    vcntCode = '''
3470        unsigned count = 0;
3471        while (srcElem1 && count < sizeof(Element) * 8) {
3472            count += srcElem1 & 0x1;
3473            srcElem1 >>= 1;
3474        }
3475        destElem = count;
3476    '''
3477
3478    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3479    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3480
3481    vmvnCode = '''
3482        destElem = ~srcElem1;
3483    '''
3484    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3485    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3486
3487    vqabsCode = '''
3488        FPSCR fpscr = (FPSCR) FpscrQc;
3489        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3490            fpscr.qc = 1;
3491            destElem = ~srcElem1;
3492        } else if (srcElem1 < 0) {
3493            destElem = -srcElem1;
3494        } else {
3495            destElem = srcElem1;
3496        }
3497        FpscrQc = fpscr;
3498    '''
3499    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3500    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3501
3502    vqnegCode = '''
3503        FPSCR fpscr = (FPSCR) FpscrQc;
3504        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3505            fpscr.qc = 1;
3506            destElem = ~srcElem1;
3507        } else {
3508            destElem = -srcElem1;
3509        }
3510        FpscrQc = fpscr;
3511    '''
3512    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3513    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3514
3515    vabsCode = '''
3516        if (srcElem1 < 0) {
3517            destElem = -srcElem1;
3518        } else {
3519            destElem = srcElem1;
3520        }
3521    '''
3522
3523    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3524    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3525    vabsfpCode = '''
3526        union
3527        {
3528            uint32_t i;
3529            float f;
3530        } cStruct;
3531        cStruct.f = srcReg1;
3532        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3533        destReg = cStruct.f;
3534    '''
3535    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3536    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3537
3538    vnegCode = '''
3539        destElem = -srcElem1;
3540    '''
3541    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3542    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3543    vnegfpCode = '''
3544        destReg = -srcReg1;
3545    '''
3546    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3547    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3548
3549    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3550    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3551    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3552    vcgtfpCode = '''
3553        FPSCR fpscr = (FPSCR) FpscrExc;
3554        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3555                             true, true, VfpRoundNearest);
3556        destReg = (res == 0) ? -1 : 0;
3557        if (res == 2.0)
3558            fpscr.ioc = 1;
3559        FpscrExc = fpscr;
3560    '''
3561    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3562            2, vcgtfpCode, toInt = True)
3563    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3564            4, vcgtfpCode, toInt = True)
3565
3566    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3567    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3568    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3569    vcgefpCode = '''
3570        FPSCR fpscr = (FPSCR) FpscrExc;
3571        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3572                             true, true, VfpRoundNearest);
3573        destReg = (res == 0) ? -1 : 0;
3574        if (res == 2.0)
3575            fpscr.ioc = 1;
3576        FpscrExc = fpscr;
3577    '''
3578    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3579            2, vcgefpCode, toInt = True)
3580    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3581            4, vcgefpCode, toInt = True)
3582
3583    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3584    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3585    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3586    vceqfpCode = '''
3587        FPSCR fpscr = (FPSCR) FpscrExc;
3588        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3589                             true, true, VfpRoundNearest);
3590        destReg = (res == 0) ? -1 : 0;
3591        if (res == 2.0)
3592            fpscr.ioc = 1;
3593        FpscrExc = fpscr;
3594    '''
3595    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3596            2, vceqfpCode, toInt = True)
3597    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3598            4, vceqfpCode, toInt = True)
3599
3600    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3601    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3602    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3603    vclefpCode = '''
3604        FPSCR fpscr = (FPSCR) FpscrExc;
3605        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3606                             true, true, VfpRoundNearest);
3607        destReg = (res == 0) ? -1 : 0;
3608        if (res == 2.0)
3609            fpscr.ioc = 1;
3610        FpscrExc = fpscr;
3611    '''
3612    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3613            2, vclefpCode, toInt = True)
3614    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3615            4, vclefpCode, toInt = True)
3616
3617    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3618    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3619    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3620    vcltfpCode = '''
3621        FPSCR fpscr = (FPSCR) FpscrExc;
3622        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3623                             true, true, VfpRoundNearest);
3624        destReg = (res == 0) ? -1 : 0;
3625        if (res == 2.0)
3626            fpscr.ioc = 1;
3627        FpscrExc = fpscr;
3628    '''
3629    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3630            2, vcltfpCode, toInt = True)
3631    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3632            4, vcltfpCode, toInt = True)
3633
3634    vswpCode = '''
3635        FloatRegBits mid;
3636        for (unsigned r = 0; r < rCount; r++) {
3637            mid = srcReg1.regs[r];
3638            srcReg1.regs[r] = destReg.regs[r];
3639            destReg.regs[r] = mid;
3640        }
3641    '''
3642    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3643    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3644
3645    vtrnCode = '''
3646        Element mid;
3647        for (unsigned i = 0; i < eCount; i += 2) {
3648            mid = srcReg1.elements[i];
3649            srcReg1.elements[i] = destReg.elements[i + 1];
3650            destReg.elements[i + 1] = mid;
3651        }
3652    '''
3653    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3654            smallUnsignedTypes, 2, vtrnCode)
3655    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3656            smallUnsignedTypes, 4, vtrnCode)
3657
3658    vuzpCode = '''
3659        Element mid[eCount];
3660        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3661        for (unsigned i = 0; i < eCount / 2; i++) {
3662            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3663            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3664            destReg.elements[i] = destReg.elements[2 * i];
3665        }
3666        for (unsigned i = 0; i < eCount / 2; i++) {
3667            destReg.elements[eCount / 2 + i] = mid[2 * i];
3668        }
3669    '''
3670    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3671    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3672
3673    vzipCode = '''
3674        Element mid[eCount];
3675        memcpy(&mid, &destReg, sizeof(destReg));
3676        for (unsigned i = 0; i < eCount / 2; i++) {
3677            destReg.elements[2 * i] = mid[i];
3678            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3679        }
3680        for (int i = 0; i < eCount / 2; i++) {
3681            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3682            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3683        }
3684    '''
3685    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3686    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3687
3688    vmovnCode = 'destElem = srcElem1;'
3689    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3690
3691    vdupCode = 'destElem = srcElem1;'
3692    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3693    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3694
3695    def vdupGprInst(name, Name, opClass, types, rCount):
3696        global header_output, exec_output
3697        eWalkCode = simdEnabledCheckCode + '''
3698        RegVect destReg;
3699        for (unsigned i = 0; i < eCount; i++) {
3700            destReg.elements[i] = htog((Element)Op1);
3701        }
3702        '''
3703        for reg in range(rCount):
3704            eWalkCode += '''
3705            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3706            ''' % { "reg" : reg }
3707        iop = InstObjParams(name, Name,
3708                            "RegRegOp",
3709                            { "code": eWalkCode,
3710                              "r_count": rCount,
3711                              "predicate_test": predicateTest,
3712                              "op_class": opClass }, [])
3713        header_output += NeonRegRegOpDeclare.subst(iop)
3714        exec_output += NeonEqualRegExecute.subst(iop)
3715        for type in types:
3716            substDict = { "targs" : type,
3717                          "class_name" : Name }
3718            exec_output += NeonExecDeclare.subst(substDict)
3719    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3720    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3721
3722    vmovCode = 'destElem = imm;'
3723    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3724    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3725
3726    vorrCode = 'destElem |= imm;'
3727    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3728    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3729
3730    vmvnCode = 'destElem = ~imm;'
3731    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3732    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3733
3734    vbicCode = 'destElem &= ~imm;'
3735    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3736    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3737
3738    vqmovnCode = '''
3739    FPSCR fpscr = (FPSCR) FpscrQc;
3740    destElem = srcElem1;
3741    if ((BigElement)destElem != srcElem1) {
3742        fpscr.qc = 1;
3743        destElem = mask(sizeof(Element) * 8 - 1);
3744        if (srcElem1 < 0)
3745            destElem = ~destElem;
3746    }
3747    FpscrQc = fpscr;
3748    '''
3749    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3750
3751    vqmovunCode = '''
3752    FPSCR fpscr = (FPSCR) FpscrQc;
3753    destElem = srcElem1;
3754    if ((BigElement)destElem != srcElem1) {
3755        fpscr.qc = 1;
3756        destElem = mask(sizeof(Element) * 8);
3757    }
3758    FpscrQc = fpscr;
3759    '''
3760    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3761            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3762
3763    vqmovunsCode = '''
3764    FPSCR fpscr = (FPSCR) FpscrQc;
3765    destElem = srcElem1;
3766    if (srcElem1 < 0 ||
3767            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3768        fpscr.qc = 1;
3769        destElem = mask(sizeof(Element) * 8);
3770        if (srcElem1 < 0)
3771            destElem = ~destElem;
3772    }
3773    FpscrQc = fpscr;
3774    '''
3775    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3776            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3777
3778    def buildVext(name, Name, opClass, types, rCount, op):
3779        global header_output, exec_output
3780        eWalkCode = simdEnabledCheckCode + '''
3781        RegVect srcReg1, srcReg2, destReg;
3782        '''
3783        for reg in range(rCount):
3784            eWalkCode += '''
3785                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3786                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3787            ''' % { "reg" : reg }
3788        eWalkCode += op
3789        for reg in range(rCount):
3790            eWalkCode += '''
3791            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3792            ''' % { "reg" : reg }
3793        iop = InstObjParams(name, Name,
3794                            "RegRegRegImmOp",
3795                            { "code": eWalkCode,
3796                              "r_count": rCount,
3797                              "predicate_test": predicateTest,
3798                              "op_class": opClass }, [])
3799        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3800        exec_output += NeonEqualRegExecute.subst(iop)
3801        for type in types:
3802            substDict = { "targs" : type,
3803                          "class_name" : Name }
3804            exec_output += NeonExecDeclare.subst(substDict)
3805
3806    vextCode = '''
3807        for (unsigned i = 0; i < eCount; i++) {
3808            unsigned index = i + imm;
3809            if (index < eCount) {
3810                destReg.elements[i] = srcReg1.elements[index];
3811            } else {
3812                index -= eCount;
3813                if (index >= eCount) {
3814                    fault = std::make_shared<UndefinedInstruction>(machInst,
3815                                                                   false,
3816                                                                   mnemonic);
3817                } else {
3818                    destReg.elements[i] = srcReg2.elements[index];
3819                }
3820            }
3821        }
3822    '''
3823    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3824    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3825
3826    def buildVtbxl(name, Name, opClass, length, isVtbl):
3827        global header_output, decoder_output, exec_output
3828        code = simdEnabledCheckCode + '''
3829            union
3830            {
3831                uint8_t bytes[32];
3832                FloatRegBits regs[8];
3833            } table;
3834
3835            union
3836            {
3837                uint8_t bytes[8];
3838                FloatRegBits regs[2];
3839            } destReg, srcReg2;
3840
3841            const unsigned length = %(length)d;
3842            const bool isVtbl = %(isVtbl)s;
3843
3844            srcReg2.regs[0] = htog(FpOp2P0_uw);
3845            srcReg2.regs[1] = htog(FpOp2P1_uw);
3846
3847            destReg.regs[0] = htog(FpDestP0_uw);
3848            destReg.regs[1] = htog(FpDestP1_uw);
3849        ''' % { "length" : length, "isVtbl" : isVtbl }
3850        for reg in range(8):
3851            if reg < length * 2:
3852                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3853                        { "reg" : reg }
3854            else:
3855                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3856        code += '''
3857        for (unsigned i = 0; i < sizeof(destReg); i++) {
3858            uint8_t index = srcReg2.bytes[i];
3859            if (index < 8 * length) {
3860                destReg.bytes[i] = table.bytes[index];
3861            } else {
3862                if (isVtbl)
3863                    destReg.bytes[i] = 0;
3864                // else destReg.bytes[i] unchanged
3865            }
3866        }
3867
3868        FpDestP0_uw = gtoh(destReg.regs[0]);
3869        FpDestP1_uw = gtoh(destReg.regs[1]);
3870        '''
3871        iop = InstObjParams(name, Name,
3872                            "RegRegRegOp",
3873                            { "code": code,
3874                              "predicate_test": predicateTest,
3875                              "op_class": opClass }, [])
3876        header_output += RegRegRegOpDeclare.subst(iop)
3877        decoder_output += RegRegRegOpConstructor.subst(iop)
3878        exec_output += PredOpExecute.subst(iop)
3879
3880    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3881    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3882    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3883    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3884
3885    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3886    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3887    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3888    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3889}};
3890