1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <class BaseS, class BaseD>
62    StaticInstPtr
63    decodeNeonSizeSingleDouble(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 2:
69            return new BaseS(machInst, dest, op1, op2);
70          case 3:
71            return new BaseD(machInst, dest, op1, op2);
72          default:
73            return new Unknown(machInst);
74        }
75    }
76
77    template <template <typename T> class Base>
78    StaticInstPtr
79    decodeNeonSThreeUReg(unsigned size,
80                         ExtMachInst machInst, IntRegIndex dest,
81                         IntRegIndex op1, IntRegIndex op2)
82    {
83        switch (size) {
84          case 0:
85            return new Base<int8_t>(machInst, dest, op1, op2);
86          case 1:
87            return new Base<int16_t>(machInst, dest, op1, op2);
88          case 2:
89            return new Base<int32_t>(machInst, dest, op1, op2);
90          case 3:
91            return new Base<int64_t>(machInst, dest, op1, op2);
92          default:
93            return new Unknown(machInst);
94        }
95    }
96
97    template <template <typename T> class Base>
98    StaticInstPtr
99    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
100                          ExtMachInst machInst, IntRegIndex dest,
101                          IntRegIndex op1, IntRegIndex op2)
102    {
103        if (notSigned) {
104            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
105        } else {
106            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
107        }
108    }
109
110    template <template <typename T> class Base>
111    StaticInstPtr
112    decodeNeonUThreeUSReg(unsigned size,
113                          ExtMachInst machInst, IntRegIndex dest,
114                          IntRegIndex op1, IntRegIndex op2)
115    {
116        switch (size) {
117          case 0:
118            return new Base<uint8_t>(machInst, dest, op1, op2);
119          case 1:
120            return new Base<uint16_t>(machInst, dest, op1, op2);
121          case 2:
122            return new Base<uint32_t>(machInst, dest, op1, op2);
123          default:
124            return new Unknown(machInst);
125        }
126    }
127
128    template <template <typename T> class Base>
129    StaticInstPtr
130    decodeNeonSThreeUSReg(unsigned size,
131                          ExtMachInst machInst, IntRegIndex dest,
132                          IntRegIndex op1, IntRegIndex op2)
133    {
134        switch (size) {
135          case 0:
136            return new Base<int8_t>(machInst, dest, op1, op2);
137          case 1:
138            return new Base<int16_t>(machInst, dest, op1, op2);
139          case 2:
140            return new Base<int32_t>(machInst, dest, op1, op2);
141          default:
142            return new Unknown(machInst);
143        }
144    }
145
146    template <template <typename T> class Base>
147    StaticInstPtr
148    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
149                             IntRegIndex dest, IntRegIndex op1,
150                             IntRegIndex op2)
151    {
152        switch (size) {
153          case 1:
154            return new Base<int16_t>(machInst, dest, op1, op2);
155          case 2:
156            return new Base<int32_t>(machInst, dest, op1, op2);
157          default:
158            return new Unknown(machInst);
159        }
160    }
161
162    template <template <typename T> class Base>
163    StaticInstPtr
164    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
165                                IntRegIndex dest, IntRegIndex op1,
166                                IntRegIndex op2, uint64_t imm)
167    {
168        switch (size) {
169          case 1:
170            return new Base<int16_t>(machInst, dest, op1, op2, imm);
171          case 2:
172            return new Base<int32_t>(machInst, dest, op1, op2, imm);
173          default:
174            return new Unknown(machInst);
175        }
176    }
177
178    template <template <typename T> class Base>
179    StaticInstPtr
180    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
181                           ExtMachInst machInst, IntRegIndex dest,
182                           IntRegIndex op1, IntRegIndex op2)
183    {
184        if (notSigned) {
185            return decodeNeonUThreeUSReg<Base>(
186                    size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonSThreeUSReg<Base>(
189                    size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonUThreeSReg(bool q, unsigned size,
197                         ExtMachInst machInst, IntRegIndex dest,
198                         IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonUThreeUSReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonUThreeUSReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeSReg(bool q, unsigned size,
213                         ExtMachInst machInst, IntRegIndex dest,
214                         IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUSReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUSReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonSThreeXReg(bool q, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (q) {
233            return decodeNeonSThreeUReg<BaseQ>(
234                    size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonSThreeUSReg<BaseD>(
237                    size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUThreeXReg(bool q, unsigned size,
245                         ExtMachInst machInst, IntRegIndex dest,
246                         IntRegIndex op1, IntRegIndex op2)
247    {
248        if (q) {
249            return decodeNeonUThreeUReg<BaseQ>(
250                    size, machInst, dest, op1, op2);
251        } else {
252            return decodeNeonUThreeUSReg<BaseD>(
253                    size, machInst, dest, op1, op2);
254        }
255    }
256
257    template <template <typename T> class BaseD,
258              template <typename T> class BaseQ>
259    StaticInstPtr
260    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
261                          ExtMachInst machInst, IntRegIndex dest,
262                          IntRegIndex op1, IntRegIndex op2)
263    {
264        if (notSigned) {
265            return decodeNeonUThreeSReg<BaseD, BaseQ>(
266                    q, size, machInst, dest, op1, op2);
267        } else {
268            return decodeNeonSThreeSReg<BaseD, BaseQ>(
269                    q, size, machInst, dest, op1, op2);
270        }
271    }
272
273    template <template <typename T> class BaseD,
274              template <typename T> class BaseQ>
275    StaticInstPtr
276    decodeNeonUThreeReg(bool q, unsigned size,
277                        ExtMachInst machInst, IntRegIndex dest,
278                        IntRegIndex op1, IntRegIndex op2)
279    {
280        if (q) {
281            return decodeNeonUThreeUReg<BaseQ>(
282                    size, machInst, dest, op1, op2);
283        } else {
284            return decodeNeonUThreeUReg<BaseD>(
285                    size, machInst, dest, op1, op2);
286        }
287    }
288
289    template <template <typename T> class BaseD,
290              template <typename T> class BaseQ>
291    StaticInstPtr
292    decodeNeonSThreeReg(bool q, unsigned size,
293                        ExtMachInst machInst, IntRegIndex dest,
294                        IntRegIndex op1, IntRegIndex op2)
295    {
296        if (q) {
297            return decodeNeonSThreeUReg<BaseQ>(
298                    size, machInst, dest, op1, op2);
299        } else {
300            return decodeNeonSThreeUReg<BaseD>(
301                    size, machInst, dest, op1, op2);
302        }
303    }
304
305    template <template <typename T> class BaseD,
306              template <typename T> class BaseQ>
307    StaticInstPtr
308    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
309                         ExtMachInst machInst, IntRegIndex dest,
310                         IntRegIndex op1, IntRegIndex op2)
311    {
312        if (notSigned) {
313            return decodeNeonUThreeReg<BaseD, BaseQ>(
314                    q, size, machInst, dest, op1, op2);
315        } else {
316            return decodeNeonSThreeReg<BaseD, BaseQ>(
317                    q, size, machInst, dest, op1, op2);
318        }
319    }
320
321    template <template <typename T> class BaseD,
322              template <typename T> class BaseQ>
323    StaticInstPtr
324    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
325                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
326    {
327        if (q) {
328            if (size)
329                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
330            else
331                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
332        } else {
333            if (size)
334                return new Unknown(machInst);
335            else
336                return new BaseD<uint32_t>(machInst, dest, op1, op2);
337        }
338    }
339
340    template <template <typename T> class Base>
341    StaticInstPtr
342    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
343                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
344    {
345        if (size)
346            return new Base<uint64_t>(machInst, dest, op1, op2);
347        else
348            return new Base<uint32_t>(machInst, dest, op1, op2);
349    }
350
351    template <template <typename T> class Base>
352    StaticInstPtr
353    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
354                               IntRegIndex dest, IntRegIndex op1,
355                               IntRegIndex op2, uint64_t imm)
356    {
357        if (size)
358            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
359        else
360            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
361    }
362
363    template <template <typename T> class BaseD,
364              template <typename T> class BaseQ>
365    StaticInstPtr
366    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
367                                IntRegIndex dest, IntRegIndex op1,
368                                IntRegIndex op2, uint64_t imm)
369    {
370        if (q) {
371            switch (size) {
372              case 1:
373                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
374              case 2:
375                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
376              default:
377                return new Unknown(machInst);
378            }
379        } else {
380            switch (size) {
381              case 1:
382                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
383              case 2:
384                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
385              default:
386                return new Unknown(machInst);
387            }
388        }
389    }
390
391    template <template <typename T> class BaseD,
392              template <typename T> class BaseQ>
393    StaticInstPtr
394    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
395                                IntRegIndex dest, IntRegIndex op1,
396                                IntRegIndex op2, uint64_t imm)
397    {
398        if (q) {
399            switch (size) {
400              case 1:
401                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
402              case 2:
403                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
404              default:
405                return new Unknown(machInst);
406            }
407        } else {
408            switch (size) {
409              case 1:
410                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
411              case 2:
412                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
413              default:
414                return new Unknown(machInst);
415            }
416        }
417    }
418
419    template <template <typename T> class BaseD,
420              template <typename T> class BaseQ>
421    StaticInstPtr
422    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
423                             IntRegIndex dest, IntRegIndex op1,
424                             IntRegIndex op2, uint64_t imm)
425    {
426        if (q) {
427            if (size)
428                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
429            else
430                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
431        } else {
432            if (size)
433                return new Unknown(machInst);
434            else
435                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
436        }
437    }
438
439    template <template <typename T> class BaseD,
440              template <typename T> class BaseQ>
441    StaticInstPtr
442    decodeNeonUTwoShiftReg(bool q, unsigned size,
443                           ExtMachInst machInst, IntRegIndex dest,
444                           IntRegIndex op1, uint64_t imm)
445    {
446        if (q) {
447            switch (size) {
448              case 0:
449                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
450              case 1:
451                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
452              case 2:
453                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
454              case 3:
455                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
456              default:
457                return new Unknown(machInst);
458            }
459        } else {
460            switch (size) {
461              case 0:
462                return new BaseD<uint8_t>(machInst, dest, op1, imm);
463              case 1:
464                return new BaseD<uint16_t>(machInst, dest, op1, imm);
465              case 2:
466                return new BaseD<uint32_t>(machInst, dest, op1, imm);
467              case 3:
468                return new BaseD<uint64_t>(machInst, dest, op1, imm);
469              default:
470                return new Unknown(machInst);
471            }
472        }
473    }
474
475    template <template <typename T> class BaseD,
476              template <typename T> class BaseQ>
477    StaticInstPtr
478    decodeNeonSTwoShiftReg(bool q, unsigned size,
479                           ExtMachInst machInst, IntRegIndex dest,
480                           IntRegIndex op1, uint64_t imm)
481    {
482        if (q) {
483            switch (size) {
484              case 0:
485                return new BaseQ<int8_t>(machInst, dest, op1, imm);
486              case 1:
487                return new BaseQ<int16_t>(machInst, dest, op1, imm);
488              case 2:
489                return new BaseQ<int32_t>(machInst, dest, op1, imm);
490              case 3:
491                return new BaseQ<int64_t>(machInst, dest, op1, imm);
492              default:
493                return new Unknown(machInst);
494            }
495        } else {
496            switch (size) {
497              case 0:
498                return new BaseD<int8_t>(machInst, dest, op1, imm);
499              case 1:
500                return new BaseD<int16_t>(machInst, dest, op1, imm);
501              case 2:
502                return new BaseD<int32_t>(machInst, dest, op1, imm);
503              case 3:
504                return new BaseD<int64_t>(machInst, dest, op1, imm);
505              default:
506                return new Unknown(machInst);
507            }
508        }
509    }
510
511
512    template <template <typename T> class BaseD,
513              template <typename T> class BaseQ>
514    StaticInstPtr
515    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
516                            ExtMachInst machInst, IntRegIndex dest,
517                            IntRegIndex op1, uint64_t imm)
518    {
519        if (notSigned) {
520            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
521                    q, size, machInst, dest, op1, imm);
522        } else {
523            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
524                    q, size, machInst, dest, op1, imm);
525        }
526    }
527
528    template <template <typename T> class Base>
529    StaticInstPtr
530    decodeNeonUTwoShiftUSReg(unsigned size,
531                             ExtMachInst machInst, IntRegIndex dest,
532                             IntRegIndex op1, uint64_t imm)
533    {
534        switch (size) {
535          case 0:
536            return new Base<uint8_t>(machInst, dest, op1, imm);
537          case 1:
538            return new Base<uint16_t>(machInst, dest, op1, imm);
539          case 2:
540            return new Base<uint32_t>(machInst, dest, op1, imm);
541          default:
542            return new Unknown(machInst);
543        }
544    }
545
546    template <template <typename T> class Base>
547    StaticInstPtr
548    decodeNeonUTwoShiftUReg(unsigned size,
549                            ExtMachInst machInst, IntRegIndex dest,
550                            IntRegIndex op1, uint64_t imm)
551    {
552        switch (size) {
553          case 0:
554            return new Base<uint8_t>(machInst, dest, op1, imm);
555          case 1:
556            return new Base<uint16_t>(machInst, dest, op1, imm);
557          case 2:
558            return new Base<uint32_t>(machInst, dest, op1, imm);
559          case 3:
560            return new Base<uint64_t>(machInst, dest, op1, imm);
561          default:
562            return new Unknown(machInst);
563        }
564    }
565
566    template <template <typename T> class Base>
567    StaticInstPtr
568    decodeNeonSTwoShiftUReg(unsigned size,
569                            ExtMachInst machInst, IntRegIndex dest,
570                            IntRegIndex op1, uint64_t imm)
571    {
572        switch (size) {
573          case 0:
574            return new Base<int8_t>(machInst, dest, op1, imm);
575          case 1:
576            return new Base<int16_t>(machInst, dest, op1, imm);
577          case 2:
578            return new Base<int32_t>(machInst, dest, op1, imm);
579          case 3:
580            return new Base<int64_t>(machInst, dest, op1, imm);
581          default:
582            return new Unknown(machInst);
583        }
584    }
585
586    template <template <typename T> class BaseD,
587              template <typename T> class BaseQ>
588    StaticInstPtr
589    decodeNeonUTwoShiftSReg(bool q, unsigned size,
590                            ExtMachInst machInst, IntRegIndex dest,
591                            IntRegIndex op1, uint64_t imm)
592    {
593        if (q) {
594            return decodeNeonUTwoShiftUSReg<BaseQ>(
595                    size, machInst, dest, op1, imm);
596        } else {
597            return decodeNeonUTwoShiftUSReg<BaseD>(
598                    size, machInst, dest, op1, imm);
599        }
600    }
601
602    template <template <typename T> class Base>
603    StaticInstPtr
604    decodeNeonSTwoShiftUSReg(unsigned size,
605                             ExtMachInst machInst, IntRegIndex dest,
606                             IntRegIndex op1, uint64_t imm)
607    {
608        switch (size) {
609          case 0:
610            return new Base<int8_t>(machInst, dest, op1, imm);
611          case 1:
612            return new Base<int16_t>(machInst, dest, op1, imm);
613          case 2:
614            return new Base<int32_t>(machInst, dest, op1, imm);
615          default:
616            return new Unknown(machInst);
617        }
618    }
619
620    template <template <typename T> class BaseD,
621              template <typename T> class BaseQ>
622    StaticInstPtr
623    decodeNeonSTwoShiftSReg(bool q, unsigned size,
624                            ExtMachInst machInst, IntRegIndex dest,
625                            IntRegIndex op1, uint64_t imm)
626    {
627        if (q) {
628            return decodeNeonSTwoShiftUSReg<BaseQ>(
629                    size, machInst, dest, op1, imm);
630        } else {
631            return decodeNeonSTwoShiftUSReg<BaseD>(
632                    size, machInst, dest, op1, imm);
633        }
634    }
635
636    template <template <typename T> class BaseD,
637              template <typename T> class BaseQ>
638    StaticInstPtr
639    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
640                             ExtMachInst machInst, IntRegIndex dest,
641                             IntRegIndex op1, uint64_t imm)
642    {
643        if (notSigned) {
644            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
645                    q, size, machInst, dest, op1, imm);
646        } else {
647            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
648                    q, size, machInst, dest, op1, imm);
649        }
650    }
651
652    template <template <typename T> class BaseD,
653              template <typename T> class BaseQ>
654    StaticInstPtr
655    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
656                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
657    {
658        if (q) {
659            return decodeNeonUTwoShiftUReg<BaseQ>(
660                size, machInst, dest, op1, imm);
661        } else {
662            return decodeNeonUTwoShiftUSReg<BaseD>(
663                size, machInst, dest, op1, imm);
664        }
665    }
666
667    template <template <typename T> class BaseD,
668              template <typename T> class BaseQ>
669    StaticInstPtr
670    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
671                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
672    {
673        if (q) {
674            return decodeNeonSTwoShiftUReg<BaseQ>(
675                size, machInst, dest, op1, imm);
676        } else {
677            return decodeNeonSTwoShiftUSReg<BaseD>(
678                size, machInst, dest, op1, imm);
679        }
680    }
681
682    template <template <typename T> class Base>
683    StaticInstPtr
684    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
685                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
686    {
687        if (size)
688            return new Base<uint64_t>(machInst, dest, op1, imm);
689        else
690            return new Base<uint32_t>(machInst, dest, op1, imm);
691    }
692
693    template <template <typename T> class BaseD,
694              template <typename T> class BaseQ>
695    StaticInstPtr
696    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
697                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
698    {
699        if (q) {
700            if (size)
701                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
702            else
703                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
704        } else {
705            if (size)
706                return new Unknown(machInst);
707            else
708                return new BaseD<uint32_t>(machInst, dest, op1, imm);
709        }
710    }
711
712    template <template <typename T> class Base>
713    StaticInstPtr
714    decodeNeonUTwoMiscUSReg(unsigned size,
715                            ExtMachInst machInst, IntRegIndex dest,
716                            IntRegIndex op1)
717    {
718        switch (size) {
719          case 0:
720            return new Base<uint8_t>(machInst, dest, op1);
721          case 1:
722            return new Base<uint16_t>(machInst, dest, op1);
723          case 2:
724            return new Base<uint32_t>(machInst, dest, op1);
725          default:
726            return new Unknown(machInst);
727        }
728    }
729
730    template <template <typename T> class Base>
731    StaticInstPtr
732    decodeNeonSTwoMiscUSReg(unsigned size,
733                            ExtMachInst machInst, IntRegIndex dest,
734                            IntRegIndex op1)
735    {
736        switch (size) {
737          case 0:
738            return new Base<int8_t>(machInst, dest, op1);
739          case 1:
740            return new Base<int16_t>(machInst, dest, op1);
741          case 2:
742            return new Base<int32_t>(machInst, dest, op1);
743          default:
744            return new Unknown(machInst);
745        }
746    }
747
748    template <template <typename T> class BaseD,
749              template <typename T> class BaseQ>
750    StaticInstPtr
751    decodeNeonUTwoMiscSReg(bool q, unsigned size,
752                           ExtMachInst machInst, IntRegIndex dest,
753                           IntRegIndex op1)
754    {
755        if (q) {
756            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
757        } else {
758            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
759        }
760    }
761
762    template <template <typename T> class BaseD,
763              template <typename T> class BaseQ>
764    StaticInstPtr
765    decodeNeonSTwoMiscSReg(bool q, unsigned size,
766                           ExtMachInst machInst, IntRegIndex dest,
767                           IntRegIndex op1)
768    {
769        if (q) {
770            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
771        } else {
772            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
773        }
774    }
775
776    template <template <typename T> class Base>
777    StaticInstPtr
778    decodeNeonUTwoMiscUReg(unsigned size,
779                           ExtMachInst machInst, IntRegIndex dest,
780                           IntRegIndex op1)
781    {
782        switch (size) {
783          case 0:
784            return new Base<uint8_t>(machInst, dest, op1);
785          case 1:
786            return new Base<uint16_t>(machInst, dest, op1);
787          case 2:
788            return new Base<uint32_t>(machInst, dest, op1);
789          case 3:
790            return new Base<uint64_t>(machInst, dest, op1);
791          default:
792            return new Unknown(machInst);
793        }
794    }
795
796    template <template <typename T> class Base>
797    StaticInstPtr
798    decodeNeonSTwoMiscUReg(unsigned size,
799                           ExtMachInst machInst, IntRegIndex dest,
800                           IntRegIndex op1)
801    {
802        switch (size) {
803          case 0:
804            return new Base<int8_t>(machInst, dest, op1);
805          case 1:
806            return new Base<int16_t>(machInst, dest, op1);
807          case 2:
808            return new Base<int32_t>(machInst, dest, op1);
809          case 3:
810            return new Base<int64_t>(machInst, dest, op1);
811          default:
812            return new Unknown(machInst);
813        }
814    }
815
816    template <template <typename T> class BaseD,
817              template <typename T> class BaseQ>
818    StaticInstPtr
819    decodeNeonSTwoMiscReg(bool q, unsigned size,
820                          ExtMachInst machInst, IntRegIndex dest,
821                          IntRegIndex op1)
822    {
823        if (q) {
824            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
825        } else {
826            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
827        }
828    }
829
830    template <template <typename T> class BaseD,
831              template <typename T> class BaseQ>
832    StaticInstPtr
833    decodeNeonUTwoMiscReg(bool q, unsigned size,
834                          ExtMachInst machInst, IntRegIndex dest,
835                          IntRegIndex op1)
836    {
837        if (q) {
838            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
839        } else {
840            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
841        }
842    }
843
844    template <template <typename T> class BaseD,
845              template <typename T> class BaseQ>
846    StaticInstPtr
847    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
848                            ExtMachInst machInst, IntRegIndex dest,
849                            IntRegIndex op1)
850    {
851        if (notSigned) {
852            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
853                    q, size, machInst, dest, op1);
854        } else {
855            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
856                    q, size, machInst, dest, op1);
857        }
858    }
859
860    template <template <typename T> class BaseD,
861              template <typename T> class BaseQ>
862    StaticInstPtr
863    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
864                           IntRegIndex dest, IntRegIndex op1)
865    {
866        if (q) {
867            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
868        } else {
869            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
870        }
871    }
872
873    template <template <typename T> class BaseD,
874              template <typename T> class BaseQ>
875    StaticInstPtr
876    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
877                           IntRegIndex dest, IntRegIndex op1)
878    {
879        if (q) {
880            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
881        } else {
882            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
883        }
884    }
885
886    template <template <typename T> class BaseD,
887              template <typename T> class BaseQ>
888    StaticInstPtr
889    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
890                            IntRegIndex dest, IntRegIndex op1)
891    {
892        if (q) {
893            if (size)
894                return new BaseQ<uint64_t>(machInst, dest, op1);
895            else
896                return new BaseQ<uint32_t>(machInst, dest, op1);
897        } else {
898            if (size)
899                return new Unknown(machInst);
900            else
901                return new BaseD<uint32_t>(machInst, dest, op1);
902        }
903    }
904
905    template <template <typename T> class BaseD,
906              template <typename T> class BaseQ>
907    StaticInstPtr
908    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
909                                   IntRegIndex dest, IntRegIndex op1)
910    {
911        if (size)
912            return new BaseQ<uint64_t>(machInst, dest, op1);
913        else
914            return new BaseD<uint32_t>(machInst, dest, op1);
915    }
916
917    template <template <typename T> class Base>
918    StaticInstPtr
919    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
920                              IntRegIndex dest, IntRegIndex op1)
921    {
922        if (size)
923            return new Base<uint64_t>(machInst, dest, op1);
924        else
925            return new Base<uint32_t>(machInst, dest, op1);
926    }
927
928    template <template <typename T> class BaseD,
929              template <typename T> class BaseQ>
930    StaticInstPtr
931    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
932                              IntRegIndex dest, IntRegIndex op1)
933    {
934        if (q) {
935            switch (size) {
936              case 0x0:
937                return new BaseQ<uint8_t>(machInst, dest, op1);
938              case 0x1:
939                return new BaseQ<uint16_t>(machInst, dest, op1);
940              case 0x2:
941                return new BaseQ<uint32_t>(machInst, dest, op1);
942              default:
943                return new Unknown(machInst);
944            }
945        } else {
946            switch (size) {
947              case 0x0:
948                return new BaseD<uint8_t>(machInst, dest, op1);
949              case 0x1:
950                return new BaseD<uint16_t>(machInst, dest, op1);
951              default:
952                return new Unknown(machInst);
953            }
954        }
955    }
956
957    template <template <typename T> class BaseD,
958              template <typename T> class BaseQ,
959              template <typename T> class BaseBQ>
960    StaticInstPtr
961    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
962                              IntRegIndex dest, IntRegIndex op1)
963    {
964        if (q) {
965            switch (size) {
966              case 0x0:
967                return new BaseQ<uint8_t>(machInst, dest, op1);
968              case 0x1:
969                return new BaseQ<uint16_t>(machInst, dest, op1);
970              case 0x2:
971                return new BaseBQ<uint32_t>(machInst, dest, op1);
972              default:
973                return new Unknown(machInst);
974            }
975        } else {
976            switch (size) {
977              case 0x0:
978                return new BaseD<uint8_t>(machInst, dest, op1);
979              case 0x1:
980                return new BaseD<uint16_t>(machInst, dest, op1);
981              default:
982                return new Unknown(machInst);
983            }
984        }
985    }
986
987    template <template <typename T> class BaseD,
988              template <typename T> class BaseQ>
989    StaticInstPtr
990    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
991                              IntRegIndex dest, IntRegIndex op1)
992    {
993        if (q) {
994            switch (size) {
995              case 0x0:
996                return new BaseQ<int8_t>(machInst, dest, op1);
997              case 0x1:
998                return new BaseQ<int16_t>(machInst, dest, op1);
999              case 0x2:
1000                return new BaseQ<int32_t>(machInst, dest, op1);
1001              default:
1002                return new Unknown(machInst);
1003            }
1004        } else {
1005            switch (size) {
1006              case 0x0:
1007                return new BaseD<int8_t>(machInst, dest, op1);
1008              case 0x1:
1009                return new BaseD<int16_t>(machInst, dest, op1);
1010              default:
1011                return new Unknown(machInst);
1012            }
1013        }
1014    }
1015
1016    template <template <typename T> class BaseD,
1017              template <typename T> class BaseQ,
1018              template <typename T> class BaseBQ>
1019    StaticInstPtr
1020    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1021                                  IntRegIndex dest, IntRegIndex op1)
1022    {
1023        if (q) {
1024            switch (size) {
1025              case 0x0:
1026                return new BaseQ<uint8_t>(machInst, dest, op1);
1027              case 0x1:
1028                return new BaseQ<uint16_t>(machInst, dest, op1);
1029              case 0x2:
1030                return new BaseBQ<uint32_t>(machInst, dest, op1);
1031              default:
1032                return new Unknown(machInst);
1033            }
1034        } else {
1035            switch (size) {
1036              case 0x0:
1037                return new BaseD<uint8_t>(machInst, dest, op1);
1038              case 0x1:
1039                return new BaseD<uint16_t>(machInst, dest, op1);
1040              default:
1041                return new Unknown(machInst);
1042            }
1043        }
1044    }
1045
1046    template <template <typename T> class BaseD,
1047              template <typename T> class BaseQ,
1048              template <typename T> class BaseBQ>
1049    StaticInstPtr
1050    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1051                                  IntRegIndex dest, IntRegIndex op1)
1052    {
1053        if (q) {
1054            switch (size) {
1055              case 0x0:
1056                return new BaseQ<int8_t>(machInst, dest, op1);
1057              case 0x1:
1058                return new BaseQ<int16_t>(machInst, dest, op1);
1059              case 0x2:
1060                return new BaseBQ<int32_t>(machInst, dest, op1);
1061              default:
1062                return new Unknown(machInst);
1063            }
1064        } else {
1065            switch (size) {
1066              case 0x0:
1067                return new BaseD<int8_t>(machInst, dest, op1);
1068              case 0x1:
1069                return new BaseD<int16_t>(machInst, dest, op1);
1070              default:
1071                return new Unknown(machInst);
1072            }
1073        }
1074    }
1075}};
1076
1077let {{
1078    header_output = ""
1079    exec_output = ""
1080
1081    vcompares = '''
1082    static float
1083    vcgtFunc(float op1, float op2)
1084    {
1085        if (std::isnan(op1) || std::isnan(op2))
1086            return 2.0;
1087        return (op1 > op2) ? 0.0 : 1.0;
1088    }
1089
1090    static float
1091    vcgeFunc(float op1, float op2)
1092    {
1093        if (std::isnan(op1) || std::isnan(op2))
1094            return 2.0;
1095        return (op1 >= op2) ? 0.0 : 1.0;
1096    }
1097
1098    static float
1099    vceqFunc(float op1, float op2)
1100    {
1101        if (isSnan(op1) || isSnan(op2))
1102            return 2.0;
1103        return (op1 == op2) ? 0.0 : 1.0;
1104    }
1105'''
1106    vcomparesL = '''
1107    static float
1108    vcleFunc(float op1, float op2)
1109    {
1110        if (std::isnan(op1) || std::isnan(op2))
1111            return 2.0;
1112        return (op1 <= op2) ? 0.0 : 1.0;
1113    }
1114
1115    static float
1116    vcltFunc(float op1, float op2)
1117    {
1118        if (std::isnan(op1) || std::isnan(op2))
1119            return 2.0;
1120        return (op1 < op2) ? 0.0 : 1.0;
1121    }
1122'''
1123    vacomparesG = '''
1124    static float
1125    vacgtFunc(float op1, float op2)
1126    {
1127        if (std::isnan(op1) || std::isnan(op2))
1128            return 2.0;
1129        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1130    }
1131
1132    static float
1133    vacgeFunc(float op1, float op2)
1134    {
1135        if (std::isnan(op1) || std::isnan(op2))
1136            return 2.0;
1137        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1138    }
1139'''
1140
1141    exec_output += vcompares + vacomparesG
1142
1143    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1144    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1145    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1146    signedTypes = smallSignedTypes + ("int64_t",)
1147    smallTypes = smallUnsignedTypes + smallSignedTypes
1148    allTypes = unsignedTypes + signedTypes
1149
1150    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1151                          readDest=False, pairwise=False,
1152                          standardFpcsr=False):
1153        global header_output, exec_output
1154        eWalkCode = simdEnabledCheckCode + '''
1155        RegVect srcReg1, srcReg2, destReg;
1156        '''
1157        for reg in range(rCount):
1158            eWalkCode += '''
1159                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1160                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1161            ''' % { "reg" : reg }
1162            if readDest:
1163                eWalkCode += '''
1164                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1165                ''' % { "reg" : reg }
1166        readDestCode = ''
1167        if standardFpcsr:
1168            eWalkCode += '''
1169            FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc);
1170            '''
1171        if readDest:
1172            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1173        if pairwise:
1174            eWalkCode += '''
1175            for (unsigned i = 0; i < eCount; i++) {
1176                Element srcElem1 = gtoh(2 * i < eCount ?
1177                                        srcReg1.elements[2 * i] :
1178                                        srcReg2.elements[2 * i - eCount]);
1179                Element srcElem2 = gtoh(2 * i < eCount ?
1180                                        srcReg1.elements[2 * i + 1] :
1181                                        srcReg2.elements[2 * i + 1 - eCount]);
1182                Element destElem;
1183                %(readDest)s
1184                %(op)s
1185                destReg.elements[i] = htog(destElem);
1186            }
1187            ''' % { "op" : op, "readDest" : readDestCode }
1188        else:
1189            eWalkCode += '''
1190            for (unsigned i = 0; i < eCount; i++) {
1191                Element srcElem1 = gtoh(srcReg1.elements[i]);
1192                Element srcElem2 = gtoh(srcReg2.elements[i]);
1193                Element destElem;
1194                %(readDest)s
1195                %(op)s
1196                destReg.elements[i] = htog(destElem);
1197            }
1198            ''' % { "op" : op, "readDest" : readDestCode }
1199        if standardFpcsr:
1200            eWalkCode += '''
1201            FpscrExc = fpscr;
1202            '''
1203        for reg in range(rCount):
1204            eWalkCode += '''
1205            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1206            ''' % { "reg" : reg }
1207        iop = InstObjParams(name, Name,
1208                            "RegRegRegOp",
1209                            { "code": eWalkCode,
1210                              "r_count": rCount,
1211                              "predicate_test": predicateTest,
1212                              "op_class": opClass }, [])
1213        header_output += NeonRegRegRegOpDeclare.subst(iop)
1214        exec_output += NeonEqualRegExecute.subst(iop)
1215        for type in types:
1216            substDict = { "targs" : type,
1217                          "class_name" : Name }
1218            exec_output += NeonExecDeclare.subst(substDict)
1219
1220    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1221                            readDest=False, pairwise=False, toInt=False):
1222        global header_output, exec_output
1223        eWalkCode = simdEnabledCheckCode + '''
1224        typedef float FloatVect[rCount];
1225        FloatVect srcRegs1, srcRegs2;
1226        '''
1227        if toInt:
1228            eWalkCode += 'RegVect destRegs;\n'
1229        else:
1230            eWalkCode += 'FloatVect destRegs;\n'
1231        for reg in range(rCount):
1232            eWalkCode += '''
1233                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1234                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1235            ''' % { "reg" : reg }
1236            if readDest:
1237                if toInt:
1238                    eWalkCode += '''
1239                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1240                    ''' % { "reg" : reg }
1241                else:
1242                    eWalkCode += '''
1243                        destRegs[%(reg)d] = FpDestP%(reg)d;
1244                    ''' % { "reg" : reg }
1245        readDestCode = ''
1246        if readDest:
1247            readDestCode = 'destReg = destRegs[r];'
1248        destType = 'float'
1249        writeDest = 'destRegs[r] = destReg;'
1250        if toInt:
1251            destType = 'uint32_t'
1252            writeDest = 'destRegs.regs[r] = destReg;'
1253        if pairwise:
1254            eWalkCode += '''
1255            for (unsigned r = 0; r < rCount; r++) {
1256                float srcReg1 = (2 * r < rCount) ?
1257                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1258                float srcReg2 = (2 * r < rCount) ?
1259                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1260                %(destType)s destReg;
1261                %(readDest)s
1262                %(op)s
1263                %(writeDest)s
1264            }
1265            ''' % { "op" : op,
1266                    "readDest" : readDestCode,
1267                    "destType" : destType,
1268                    "writeDest" : writeDest }
1269        else:
1270            eWalkCode += '''
1271            for (unsigned r = 0; r < rCount; r++) {
1272                float srcReg1 = srcRegs1[r];
1273                float srcReg2 = srcRegs2[r];
1274                %(destType)s destReg;
1275                %(readDest)s
1276                %(op)s
1277                %(writeDest)s
1278            }
1279            ''' % { "op" : op,
1280                    "readDest" : readDestCode,
1281                    "destType" : destType,
1282                    "writeDest" : writeDest }
1283        for reg in range(rCount):
1284            if toInt:
1285                eWalkCode += '''
1286                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1287                ''' % { "reg" : reg }
1288            else:
1289                eWalkCode += '''
1290                FpDestP%(reg)d = destRegs[%(reg)d];
1291                ''' % { "reg" : reg }
1292        iop = InstObjParams(name, Name,
1293                            "FpRegRegRegOp",
1294                            { "code": eWalkCode,
1295                              "r_count": rCount,
1296                              "predicate_test": predicateTest,
1297                              "op_class": opClass }, [])
1298        header_output += NeonRegRegRegOpDeclare.subst(iop)
1299        exec_output += NeonEqualRegExecute.subst(iop)
1300        for type in types:
1301            substDict = { "targs" : type,
1302                          "class_name" : Name }
1303            exec_output += NeonExecDeclare.subst(substDict)
1304
1305    def threeUnequalRegInst(name, Name, opClass, types, op,
1306                            bigSrc1, bigSrc2, bigDest, readDest):
1307        global header_output, exec_output
1308        src1Cnt = src2Cnt = destCnt = 2
1309        src1Prefix = src2Prefix = destPrefix = ''
1310        if bigSrc1:
1311            src1Cnt = 4
1312            src1Prefix = 'Big'
1313        if bigSrc2:
1314            src2Cnt = 4
1315            src2Prefix = 'Big'
1316        if bigDest:
1317            destCnt = 4
1318            destPrefix = 'Big'
1319        eWalkCode = simdEnabledCheckCode + '''
1320            %sRegVect srcReg1;
1321            %sRegVect srcReg2;
1322            %sRegVect destReg;
1323        ''' % (src1Prefix, src2Prefix, destPrefix)
1324        for reg in range(src1Cnt):
1325            eWalkCode += '''
1326                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1327            ''' % { "reg" : reg }
1328        for reg in range(src2Cnt):
1329            eWalkCode += '''
1330                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1331            ''' % { "reg" : reg }
1332        if readDest:
1333            for reg in range(destCnt):
1334                eWalkCode += '''
1335                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1336                ''' % { "reg" : reg }
1337        readDestCode = ''
1338        if readDest:
1339            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1340        eWalkCode += '''
1341        for (unsigned i = 0; i < eCount; i++) {
1342            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1343            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1344            %(destPrefix)sElement destElem;
1345            %(readDest)s
1346            %(op)s
1347            destReg.elements[i] = htog(destElem);
1348        }
1349        ''' % { "op" : op, "readDest" : readDestCode,
1350                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1351                "destPrefix" : destPrefix }
1352        for reg in range(destCnt):
1353            eWalkCode += '''
1354            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1355            ''' % { "reg" : reg }
1356        iop = InstObjParams(name, Name,
1357                            "RegRegRegOp",
1358                            { "code": eWalkCode,
1359                              "r_count": 2,
1360                              "predicate_test": predicateTest,
1361                              "op_class": opClass }, [])
1362        header_output += NeonRegRegRegOpDeclare.subst(iop)
1363        exec_output += NeonUnequalRegExecute.subst(iop)
1364        for type in types:
1365            substDict = { "targs" : type,
1366                          "class_name" : Name }
1367            exec_output += NeonExecDeclare.subst(substDict)
1368
1369    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1370        threeUnequalRegInst(name, Name, opClass, types, op,
1371                            True, True, False, readDest)
1372
1373    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1374        threeUnequalRegInst(name, Name, opClass, types, op,
1375                            False, False, True, readDest)
1376
1377    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1378        threeUnequalRegInst(name, Name, opClass, types, op,
1379                            True, False, True, readDest)
1380
1381    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1382        global header_output, exec_output
1383        eWalkCode = simdEnabledCheckCode + '''
1384        RegVect srcReg1, srcReg2, destReg;
1385        '''
1386        for reg in range(rCount):
1387            eWalkCode += '''
1388                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1389                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1390            ''' % { "reg" : reg }
1391            if readDest:
1392                eWalkCode += '''
1393                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1394                ''' % { "reg" : reg }
1395        readDestCode = ''
1396        if readDest:
1397            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1398        eWalkCode += '''
1399        if (imm < 0 && imm >= eCount) {
1400            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1401                                                           mnemonic);
1402        } else {
1403            for (unsigned i = 0; i < eCount; i++) {
1404                Element srcElem1 = gtoh(srcReg1.elements[i]);
1405                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1406                Element destElem;
1407                %(readDest)s
1408                %(op)s
1409                destReg.elements[i] = htog(destElem);
1410            }
1411        }
1412        ''' % { "op" : op, "readDest" : readDestCode }
1413        for reg in range(rCount):
1414            eWalkCode += '''
1415            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1416            ''' % { "reg" : reg }
1417        iop = InstObjParams(name, Name,
1418                            "RegRegRegImmOp",
1419                            { "code": eWalkCode,
1420                              "r_count": rCount,
1421                              "predicate_test": predicateTest,
1422                              "op_class": opClass }, [])
1423        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1424        exec_output += NeonEqualRegExecute.subst(iop)
1425        for type in types:
1426            substDict = { "targs" : type,
1427                          "class_name" : Name }
1428            exec_output += NeonExecDeclare.subst(substDict)
1429
1430    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1431        global header_output, exec_output
1432        rCount = 2
1433        eWalkCode = simdEnabledCheckCode + '''
1434        RegVect srcReg1, srcReg2;
1435        BigRegVect destReg;
1436        '''
1437        for reg in range(rCount):
1438            eWalkCode += '''
1439                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1440                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1441            ''' % { "reg" : reg }
1442        if readDest:
1443            for reg in range(2 * rCount):
1444                eWalkCode += '''
1445                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1446                ''' % { "reg" : reg }
1447        readDestCode = ''
1448        if readDest:
1449            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1450        eWalkCode += '''
1451        if (imm < 0 && imm >= eCount) {
1452            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1453                                                          mnemonic);
1454        } else {
1455            for (unsigned i = 0; i < eCount; i++) {
1456                Element srcElem1 = gtoh(srcReg1.elements[i]);
1457                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1458                BigElement destElem;
1459                %(readDest)s
1460                %(op)s
1461                destReg.elements[i] = htog(destElem);
1462            }
1463        }
1464        ''' % { "op" : op, "readDest" : readDestCode }
1465        for reg in range(2 * rCount):
1466            eWalkCode += '''
1467            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1468            ''' % { "reg" : reg }
1469        iop = InstObjParams(name, Name,
1470                            "RegRegRegImmOp",
1471                            { "code": eWalkCode,
1472                              "r_count": rCount,
1473                              "predicate_test": predicateTest,
1474                              "op_class": opClass }, [])
1475        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1476        exec_output += NeonUnequalRegExecute.subst(iop)
1477        for type in types:
1478            substDict = { "targs" : type,
1479                          "class_name" : Name }
1480            exec_output += NeonExecDeclare.subst(substDict)
1481
1482    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1483        global header_output, exec_output
1484        eWalkCode = simdEnabledCheckCode + '''
1485        typedef float FloatVect[rCount];
1486        FloatVect srcRegs1, srcRegs2, destRegs;
1487        '''
1488        for reg in range(rCount):
1489            eWalkCode += '''
1490                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1491                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1492            ''' % { "reg" : reg }
1493            if readDest:
1494                eWalkCode += '''
1495                    destRegs[%(reg)d] = FpDestP%(reg)d;
1496                ''' % { "reg" : reg }
1497        readDestCode = ''
1498        if readDest:
1499            readDestCode = 'destReg = destRegs[i];'
1500        eWalkCode += '''
1501        if (imm < 0 && imm >= eCount) {
1502            fault = std::make_shared<UndefinedInstruction>(machInst, false,
1503                                                           mnemonic);
1504        } else {
1505            for (unsigned i = 0; i < rCount; i++) {
1506                float srcReg1 = srcRegs1[i];
1507                float srcReg2 = srcRegs2[imm];
1508                float destReg;
1509                %(readDest)s
1510                %(op)s
1511                destRegs[i] = destReg;
1512            }
1513        }
1514        ''' % { "op" : op, "readDest" : readDestCode }
1515        for reg in range(rCount):
1516            eWalkCode += '''
1517            FpDestP%(reg)d = destRegs[%(reg)d];
1518            ''' % { "reg" : reg }
1519        iop = InstObjParams(name, Name,
1520                            "FpRegRegRegImmOp",
1521                            { "code": eWalkCode,
1522                              "r_count": rCount,
1523                              "predicate_test": predicateTest,
1524                              "op_class": opClass }, [])
1525        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1526        exec_output += NeonEqualRegExecute.subst(iop)
1527        for type in types:
1528            substDict = { "targs" : type,
1529                          "class_name" : Name }
1530            exec_output += NeonExecDeclare.subst(substDict)
1531
1532    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1533            readDest=False, toInt=False, fromInt=False):
1534        global header_output, exec_output
1535        eWalkCode = simdEnabledCheckCode + '''
1536        RegVect srcRegs1, destRegs;
1537        '''
1538        for reg in range(rCount):
1539            eWalkCode += '''
1540                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1541            ''' % { "reg" : reg }
1542            if readDest:
1543                eWalkCode += '''
1544                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1545                ''' % { "reg" : reg }
1546        readDestCode = ''
1547        if readDest:
1548            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1549            if toInt:
1550                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1551        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1552        if fromInt:
1553            readOpCode = 'uint32_t srcReg1 = gtoh(srcRegs1.regs[i]);'
1554        declDest = 'Element destElem;'
1555        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1556        if toInt:
1557            declDest = 'uint32_t destReg;'
1558            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1559        eWalkCode += '''
1560        for (unsigned i = 0; i < eCount; i++) {
1561            %(readOp)s
1562            %(declDest)s
1563            %(readDest)s
1564            %(op)s
1565            %(writeDest)s
1566        }
1567        ''' % { "readOp" : readOpCode,
1568                "declDest" : declDest,
1569                "readDest" : readDestCode,
1570                "op" : op,
1571                "writeDest" : writeDestCode }
1572        for reg in range(rCount):
1573            eWalkCode += '''
1574            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1575            ''' % { "reg" : reg }
1576        iop = InstObjParams(name, Name,
1577                            "RegRegImmOp",
1578                            { "code": eWalkCode,
1579                              "r_count": rCount,
1580                              "predicate_test": predicateTest,
1581                              "op_class": opClass }, [])
1582        header_output += NeonRegRegImmOpDeclare.subst(iop)
1583        exec_output += NeonEqualRegExecute.subst(iop)
1584        for type in types:
1585            substDict = { "targs" : type,
1586                          "class_name" : Name }
1587            exec_output += NeonExecDeclare.subst(substDict)
1588
1589    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1590        global header_output, exec_output
1591        eWalkCode = simdEnabledCheckCode + '''
1592        BigRegVect srcReg1;
1593        RegVect destReg;
1594        '''
1595        for reg in range(4):
1596            eWalkCode += '''
1597                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1598            ''' % { "reg" : reg }
1599        if readDest:
1600            for reg in range(2):
1601                eWalkCode += '''
1602                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1603                ''' % { "reg" : reg }
1604        readDestCode = ''
1605        if readDest:
1606            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1607        eWalkCode += '''
1608        for (unsigned i = 0; i < eCount; i++) {
1609            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1610            Element destElem;
1611            %(readDest)s
1612            %(op)s
1613            destReg.elements[i] = htog(destElem);
1614        }
1615        ''' % { "op" : op, "readDest" : readDestCode }
1616        for reg in range(2):
1617            eWalkCode += '''
1618            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1619            ''' % { "reg" : reg }
1620        iop = InstObjParams(name, Name,
1621                            "RegRegImmOp",
1622                            { "code": eWalkCode,
1623                              "r_count": 2,
1624                              "predicate_test": predicateTest,
1625                              "op_class": opClass }, [])
1626        header_output += NeonRegRegImmOpDeclare.subst(iop)
1627        exec_output += NeonUnequalRegExecute.subst(iop)
1628        for type in types:
1629            substDict = { "targs" : type,
1630                          "class_name" : Name }
1631            exec_output += NeonExecDeclare.subst(substDict)
1632
1633    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1634        global header_output, exec_output
1635        eWalkCode = simdEnabledCheckCode + '''
1636        RegVect srcReg1;
1637        BigRegVect destReg;
1638        '''
1639        for reg in range(2):
1640            eWalkCode += '''
1641                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1642            ''' % { "reg" : reg }
1643        if readDest:
1644            for reg in range(4):
1645                eWalkCode += '''
1646                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1647                ''' % { "reg" : reg }
1648        readDestCode = ''
1649        if readDest:
1650            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1651        eWalkCode += '''
1652        for (unsigned i = 0; i < eCount; i++) {
1653            Element srcElem1 = gtoh(srcReg1.elements[i]);
1654            BigElement destElem;
1655            %(readDest)s
1656            %(op)s
1657            destReg.elements[i] = htog(destElem);
1658        }
1659        ''' % { "op" : op, "readDest" : readDestCode }
1660        for reg in range(4):
1661            eWalkCode += '''
1662            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1663            ''' % { "reg" : reg }
1664        iop = InstObjParams(name, Name,
1665                            "RegRegImmOp",
1666                            { "code": eWalkCode,
1667                              "r_count": 2,
1668                              "predicate_test": predicateTest,
1669                              "op_class": opClass }, [])
1670        header_output += NeonRegRegImmOpDeclare.subst(iop)
1671        exec_output += NeonUnequalRegExecute.subst(iop)
1672        for type in types:
1673            substDict = { "targs" : type,
1674                          "class_name" : Name }
1675            exec_output += NeonExecDeclare.subst(substDict)
1676
1677    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1678        global header_output, exec_output
1679        eWalkCode = simdEnabledCheckCode + '''
1680        RegVect srcReg1, destReg;
1681        '''
1682        for reg in range(rCount):
1683            eWalkCode += '''
1684                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1685            ''' % { "reg" : reg }
1686            if readDest:
1687                eWalkCode += '''
1688                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1689                ''' % { "reg" : reg }
1690        readDestCode = ''
1691        if readDest:
1692            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1693        eWalkCode += '''
1694        for (unsigned i = 0; i < eCount; i++) {
1695            unsigned j = i;
1696            Element srcElem1 = gtoh(srcReg1.elements[i]);
1697            Element destElem;
1698            %(readDest)s
1699            %(op)s
1700            destReg.elements[j] = htog(destElem);
1701        }
1702        ''' % { "op" : op, "readDest" : readDestCode }
1703        for reg in range(rCount):
1704            eWalkCode += '''
1705            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1706            ''' % { "reg" : reg }
1707        iop = InstObjParams(name, Name,
1708                            "RegRegOp",
1709                            { "code": eWalkCode,
1710                              "r_count": rCount,
1711                              "predicate_test": predicateTest,
1712                              "op_class": opClass }, [])
1713        header_output += NeonRegRegOpDeclare.subst(iop)
1714        exec_output += NeonEqualRegExecute.subst(iop)
1715        for type in types:
1716            substDict = { "targs" : type,
1717                          "class_name" : Name }
1718            exec_output += NeonExecDeclare.subst(substDict)
1719
1720    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1721        global header_output, exec_output
1722        eWalkCode = simdEnabledCheckCode + '''
1723        RegVect srcReg1, destReg;
1724        '''
1725        for reg in range(rCount):
1726            eWalkCode += '''
1727                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1728            ''' % { "reg" : reg }
1729            if readDest:
1730                eWalkCode += '''
1731                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1732                ''' % { "reg" : reg }
1733        readDestCode = ''
1734        if readDest:
1735            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1736        eWalkCode += '''
1737        for (unsigned i = 0; i < eCount; i++) {
1738            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1739            Element destElem;
1740            %(readDest)s
1741            %(op)s
1742            destReg.elements[i] = htog(destElem);
1743        }
1744        ''' % { "op" : op, "readDest" : readDestCode }
1745        for reg in range(rCount):
1746            eWalkCode += '''
1747            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1748            ''' % { "reg" : reg }
1749        iop = InstObjParams(name, Name,
1750                            "RegRegImmOp",
1751                            { "code": eWalkCode,
1752                              "r_count": rCount,
1753                              "predicate_test": predicateTest,
1754                              "op_class": opClass }, [])
1755        header_output += NeonRegRegImmOpDeclare.subst(iop)
1756        exec_output += NeonEqualRegExecute.subst(iop)
1757        for type in types:
1758            substDict = { "targs" : type,
1759                          "class_name" : Name }
1760            exec_output += NeonExecDeclare.subst(substDict)
1761
1762    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1763        global header_output, exec_output
1764        eWalkCode = simdEnabledCheckCode + '''
1765        RegVect srcReg1, destReg;
1766        '''
1767        for reg in range(rCount):
1768            eWalkCode += '''
1769                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1770                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1771            ''' % { "reg" : reg }
1772            if readDest:
1773                eWalkCode += '''
1774                ''' % { "reg" : reg }
1775        readDestCode = ''
1776        if readDest:
1777            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1778        eWalkCode += op
1779        for reg in range(rCount):
1780            eWalkCode += '''
1781            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1782            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1783            ''' % { "reg" : reg }
1784        iop = InstObjParams(name, Name,
1785                            "RegRegOp",
1786                            { "code": eWalkCode,
1787                              "r_count": rCount,
1788                              "predicate_test": predicateTest,
1789                              "op_class": opClass }, [])
1790        header_output += NeonRegRegOpDeclare.subst(iop)
1791        exec_output += NeonEqualRegExecute.subst(iop)
1792        for type in types:
1793            substDict = { "targs" : type,
1794                          "class_name" : Name }
1795            exec_output += NeonExecDeclare.subst(substDict)
1796
1797    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1798            readDest=False, toInt=False):
1799        global header_output, exec_output
1800        eWalkCode = simdEnabledCheckCode + '''
1801        typedef float FloatVect[rCount];
1802        FloatVect srcRegs1;
1803        '''
1804        if toInt:
1805            eWalkCode += 'RegVect destRegs;\n'
1806        else:
1807            eWalkCode += 'FloatVect destRegs;\n'
1808        for reg in range(rCount):
1809            eWalkCode += '''
1810                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1811            ''' % { "reg" : reg }
1812            if readDest:
1813                if toInt:
1814                    eWalkCode += '''
1815                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1816                    ''' % { "reg" : reg }
1817                else:
1818                    eWalkCode += '''
1819                        destRegs[%(reg)d] = FpDestP%(reg)d;
1820                    ''' % { "reg" : reg }
1821        readDestCode = ''
1822        if readDest:
1823            readDestCode = 'destReg = destRegs[i];'
1824        destType = 'float'
1825        writeDest = 'destRegs[r] = destReg;'
1826        if toInt:
1827            destType = 'uint32_t'
1828            writeDest = 'destRegs.regs[r] = destReg;'
1829        eWalkCode += '''
1830        for (unsigned r = 0; r < rCount; r++) {
1831            float srcReg1 = srcRegs1[r];
1832            %(destType)s destReg;
1833            %(readDest)s
1834            %(op)s
1835            %(writeDest)s
1836        }
1837        ''' % { "op" : op,
1838                "readDest" : readDestCode,
1839                "destType" : destType,
1840                "writeDest" : writeDest }
1841        for reg in range(rCount):
1842            if toInt:
1843                eWalkCode += '''
1844                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1845                ''' % { "reg" : reg }
1846            else:
1847                eWalkCode += '''
1848                FpDestP%(reg)d = destRegs[%(reg)d];
1849                ''' % { "reg" : reg }
1850        iop = InstObjParams(name, Name,
1851                            "FpRegRegOp",
1852                            { "code": eWalkCode,
1853                              "r_count": rCount,
1854                              "predicate_test": predicateTest,
1855                              "op_class": opClass }, [])
1856        header_output += NeonRegRegOpDeclare.subst(iop)
1857        exec_output += NeonEqualRegExecute.subst(iop)
1858        for type in types:
1859            substDict = { "targs" : type,
1860                          "class_name" : Name }
1861            exec_output += NeonExecDeclare.subst(substDict)
1862
1863    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1864        global header_output, exec_output
1865        eWalkCode = simdEnabledCheckCode + '''
1866        RegVect srcRegs;
1867        BigRegVect destReg;
1868        '''
1869        for reg in range(rCount):
1870            eWalkCode += '''
1871                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1872            ''' % { "reg" : reg }
1873            if readDest:
1874                eWalkCode += '''
1875                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1876                ''' % { "reg" : reg }
1877        readDestCode = ''
1878        if readDest:
1879            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1880        eWalkCode += '''
1881        for (unsigned i = 0; i < eCount / 2; i++) {
1882            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1883            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1884            BigElement destElem;
1885            %(readDest)s
1886            %(op)s
1887            destReg.elements[i] = htog(destElem);
1888        }
1889        ''' % { "op" : op, "readDest" : readDestCode }
1890        for reg in range(rCount):
1891            eWalkCode += '''
1892            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1893            ''' % { "reg" : reg }
1894        iop = InstObjParams(name, Name,
1895                            "RegRegOp",
1896                            { "code": eWalkCode,
1897                              "r_count": rCount,
1898                              "predicate_test": predicateTest,
1899                              "op_class": opClass }, [])
1900        header_output += NeonRegRegOpDeclare.subst(iop)
1901        exec_output += NeonUnequalRegExecute.subst(iop)
1902        for type in types:
1903            substDict = { "targs" : type,
1904                          "class_name" : Name }
1905            exec_output += NeonExecDeclare.subst(substDict)
1906
1907    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1908        global header_output, exec_output
1909        eWalkCode = simdEnabledCheckCode + '''
1910        BigRegVect srcReg1;
1911        RegVect destReg;
1912        '''
1913        for reg in range(4):
1914            eWalkCode += '''
1915                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1916            ''' % { "reg" : reg }
1917        if readDest:
1918            for reg in range(2):
1919                eWalkCode += '''
1920                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1921                ''' % { "reg" : reg }
1922        readDestCode = ''
1923        if readDest:
1924            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1925        eWalkCode += '''
1926        for (unsigned i = 0; i < eCount; i++) {
1927            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1928            Element destElem;
1929            %(readDest)s
1930            %(op)s
1931            destReg.elements[i] = htog(destElem);
1932        }
1933        ''' % { "op" : op, "readDest" : readDestCode }
1934        for reg in range(2):
1935            eWalkCode += '''
1936            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1937            ''' % { "reg" : reg }
1938        iop = InstObjParams(name, Name,
1939                            "RegRegOp",
1940                            { "code": eWalkCode,
1941                              "r_count": 2,
1942                              "predicate_test": predicateTest,
1943                              "op_class": opClass }, [])
1944        header_output += NeonRegRegOpDeclare.subst(iop)
1945        exec_output += NeonUnequalRegExecute.subst(iop)
1946        for type in types:
1947            substDict = { "targs" : type,
1948                          "class_name" : Name }
1949            exec_output += NeonExecDeclare.subst(substDict)
1950
1951    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1952        global header_output, exec_output
1953        eWalkCode = simdEnabledCheckCode + '''
1954        RegVect destReg;
1955        '''
1956        if readDest:
1957            for reg in range(rCount):
1958                eWalkCode += '''
1959                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1960                ''' % { "reg" : reg }
1961        readDestCode = ''
1962        if readDest:
1963            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1964        eWalkCode += '''
1965        for (unsigned i = 0; i < eCount; i++) {
1966            Element destElem;
1967            %(readDest)s
1968            %(op)s
1969            destReg.elements[i] = htog(destElem);
1970        }
1971        ''' % { "op" : op, "readDest" : readDestCode }
1972        for reg in range(rCount):
1973            eWalkCode += '''
1974            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1975            ''' % { "reg" : reg }
1976        iop = InstObjParams(name, Name,
1977                            "RegImmOp",
1978                            { "code": eWalkCode,
1979                              "r_count": rCount,
1980                              "predicate_test": predicateTest,
1981                              "op_class": opClass }, [])
1982        header_output += NeonRegImmOpDeclare.subst(iop)
1983        exec_output += NeonEqualRegExecute.subst(iop)
1984        for type in types:
1985            substDict = { "targs" : type,
1986                          "class_name" : Name }
1987            exec_output += NeonExecDeclare.subst(substDict)
1988
1989    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1990        global header_output, exec_output
1991        eWalkCode = simdEnabledCheckCode + '''
1992        RegVect srcReg1;
1993        BigRegVect destReg;
1994        '''
1995        for reg in range(2):
1996            eWalkCode += '''
1997                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1998            ''' % { "reg" : reg }
1999        if readDest:
2000            for reg in range(4):
2001                eWalkCode += '''
2002                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
2003                ''' % { "reg" : reg }
2004        readDestCode = ''
2005        if readDest:
2006            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
2007        eWalkCode += '''
2008        for (unsigned i = 0; i < eCount; i++) {
2009            Element srcElem1 = gtoh(srcReg1.elements[i]);
2010            BigElement destElem;
2011            %(readDest)s
2012            %(op)s
2013            destReg.elements[i] = htog(destElem);
2014        }
2015        ''' % { "op" : op, "readDest" : readDestCode }
2016        for reg in range(4):
2017            eWalkCode += '''
2018            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
2019            ''' % { "reg" : reg }
2020        iop = InstObjParams(name, Name,
2021                            "RegRegOp",
2022                            { "code": eWalkCode,
2023                              "r_count": 2,
2024                              "predicate_test": predicateTest,
2025                              "op_class": opClass }, [])
2026        header_output += NeonRegRegOpDeclare.subst(iop)
2027        exec_output += NeonUnequalRegExecute.subst(iop)
2028        for type in types:
2029            substDict = { "targs" : type,
2030                          "class_name" : Name }
2031            exec_output += NeonExecDeclare.subst(substDict)
2032
2033    vhaddCode = '''
2034        Element carryBit =
2035            (((unsigned)srcElem1 & 0x1) +
2036             ((unsigned)srcElem2 & 0x1)) >> 1;
2037        // Use division instead of a shift to ensure the sign extension works
2038        // right. The compiler will figure out if it can be a shift. Mask the
2039        // inputs so they get truncated correctly.
2040        destElem = (((srcElem1 & ~(Element)1) / 2) +
2041                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2042    '''
2043    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2044    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2045
2046    vrhaddCode = '''
2047        Element carryBit =
2048            (((unsigned)srcElem1 & 0x1) +
2049             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2050        // Use division instead of a shift to ensure the sign extension works
2051        // right. The compiler will figure out if it can be a shift. Mask the
2052        // inputs so they get truncated correctly.
2053        destElem = (((srcElem1 & ~(Element)1) / 2) +
2054                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2055    '''
2056    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2057    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2058
2059    vhsubCode = '''
2060        Element barrowBit =
2061            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2062        // Use division instead of a shift to ensure the sign extension works
2063        // right. The compiler will figure out if it can be a shift. Mask the
2064        // inputs so they get truncated correctly.
2065        destElem = (((srcElem1 & ~(Element)1) / 2) -
2066                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2067    '''
2068    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2069    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2070
2071    vandCode = '''
2072        destElem = srcElem1 & srcElem2;
2073    '''
2074    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2075    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2076
2077    vbicCode = '''
2078        destElem = srcElem1 & ~srcElem2;
2079    '''
2080    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2081    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2082
2083    vorrCode = '''
2084        destElem = srcElem1 | srcElem2;
2085    '''
2086    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2087    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2088
2089    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2090    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2091
2092    vornCode = '''
2093        destElem = srcElem1 | ~srcElem2;
2094    '''
2095    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2096    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2097
2098    veorCode = '''
2099        destElem = srcElem1 ^ srcElem2;
2100    '''
2101    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2102    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2103
2104    vbifCode = '''
2105        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2106    '''
2107    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2108    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2109    vbitCode = '''
2110        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2111    '''
2112    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2113    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2114    vbslCode = '''
2115        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2116    '''
2117    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2118    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2119
2120    vmaxCode = '''
2121        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2122    '''
2123    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2124    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2125
2126    vminCode = '''
2127        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2128    '''
2129    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2130    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2131
2132    vaddCode = '''
2133        destElem = srcElem1 + srcElem2;
2134    '''
2135    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2136    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2137
2138    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2139                      2, vaddCode, pairwise=True)
2140    vaddlwCode = '''
2141        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2142    '''
2143    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2144    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2145    vaddhnCode = '''
2146        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2147                   (sizeof(Element) * 8);
2148    '''
2149    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2150    vraddhnCode = '''
2151        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2152                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2153                   (sizeof(Element) * 8);
2154    '''
2155    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2156
2157    vsubCode = '''
2158        destElem = srcElem1 - srcElem2;
2159    '''
2160    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2161    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2162    vsublwCode = '''
2163        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2164    '''
2165    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2166    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2167
2168    vqaddUCode = '''
2169        destElem = srcElem1 + srcElem2;
2170        FPSCR fpscr = (FPSCR) FpscrQc;
2171        if (destElem < srcElem1 || destElem < srcElem2) {
2172            destElem = (Element)(-1);
2173            fpscr.qc = 1;
2174        }
2175        FpscrQc = fpscr;
2176    '''
2177    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2178    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2179    vsubhnCode = '''
2180        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2181                   (sizeof(Element) * 8);
2182    '''
2183    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2184    vrsubhnCode = '''
2185        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2186                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2187                   (sizeof(Element) * 8);
2188    '''
2189    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2190
2191    vqaddSCode = '''
2192        destElem = srcElem1 + srcElem2;
2193        FPSCR fpscr = (FPSCR) FpscrQc;
2194        bool negDest = (destElem < 0);
2195        bool negSrc1 = (srcElem1 < 0);
2196        bool negSrc2 = (srcElem2 < 0);
2197        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2198            if (negDest)
2199                /* If (>=0) plus (>=0) yields (<0), saturate to +. */
2200                destElem = std::numeric_limits<Element>::max();
2201            else
2202                /* If (<0) plus (<0) yields (>=0), saturate to -. */
2203                destElem = std::numeric_limits<Element>::min();
2204            fpscr.qc = 1;
2205        }
2206        FpscrQc = fpscr;
2207    '''
2208    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2209    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2210
2211    vqsubUCode = '''
2212        destElem = srcElem1 - srcElem2;
2213        FPSCR fpscr = (FPSCR) FpscrQc;
2214        if (destElem > srcElem1) {
2215            destElem = 0;
2216            fpscr.qc = 1;
2217        }
2218        FpscrQc = fpscr;
2219    '''
2220    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2221    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2222
2223    vqsubSCode = '''
2224        destElem = srcElem1 - srcElem2;
2225        FPSCR fpscr = (FPSCR) FpscrQc;
2226        bool negDest = (destElem < 0);
2227        bool negSrc1 = (srcElem1 < 0);
2228        bool posSrc2 = (srcElem2 >= 0);
2229        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2230            if (negDest)
2231                /* If (>=0) minus (<0) yields (<0), saturate to +. */
2232                destElem = std::numeric_limits<Element>::max();
2233            else
2234                /* If (<0) minus (>=0) yields (>=0), saturate to -. */
2235                destElem = std::numeric_limits<Element>::min();
2236            fpscr.qc = 1;
2237        }
2238        FpscrQc = fpscr;
2239    '''
2240    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2241    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2242
2243    vcgtCode = '''
2244        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2245    '''
2246    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2247    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2248
2249    vcgeCode = '''
2250        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2251    '''
2252    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2253    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2254
2255    vceqCode = '''
2256        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2257    '''
2258    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2259    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2260
2261    vshlCode = '''
2262        int16_t shiftAmt = (int8_t)srcElem2;
2263        if (shiftAmt < 0) {
2264            shiftAmt = -shiftAmt;
2265            if (shiftAmt >= sizeof(Element) * 8) {
2266                shiftAmt = sizeof(Element) * 8 - 1;
2267                destElem = 0;
2268            } else {
2269                destElem = (srcElem1 >> shiftAmt);
2270            }
2271            // Make sure the right shift sign extended when it should.
2272            if (ltz(srcElem1) && !ltz(destElem)) {
2273                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2274                                             1 - shiftAmt));
2275            }
2276        } else {
2277            if (shiftAmt >= sizeof(Element) * 8) {
2278                destElem = 0;
2279            } else {
2280                destElem = srcElem1 << shiftAmt;
2281            }
2282        }
2283    '''
2284    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2285    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2286
2287    vrshlCode = '''
2288        int16_t shiftAmt = (int8_t)srcElem2;
2289        if (shiftAmt < 0) {
2290            shiftAmt = -shiftAmt;
2291            Element rBit = 0;
2292            if (shiftAmt <= sizeof(Element) * 8)
2293                rBit = bits(srcElem1, shiftAmt - 1);
2294            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2295                rBit = 1;
2296            if (shiftAmt >= sizeof(Element) * 8) {
2297                shiftAmt = sizeof(Element) * 8 - 1;
2298                destElem = 0;
2299            } else {
2300                destElem = (srcElem1 >> shiftAmt);
2301            }
2302            // Make sure the right shift sign extended when it should.
2303            if (ltz(srcElem1) && !ltz(destElem)) {
2304                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2305                                             1 - shiftAmt));
2306            }
2307            destElem += rBit;
2308        } else if (shiftAmt > 0) {
2309            if (shiftAmt >= sizeof(Element) * 8) {
2310                destElem = 0;
2311            } else {
2312                destElem = srcElem1 << shiftAmt;
2313            }
2314        } else {
2315            destElem = srcElem1;
2316        }
2317    '''
2318    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2319    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2320
2321    vqshlUCode = '''
2322        int16_t shiftAmt = (int8_t)srcElem2;
2323        FPSCR fpscr = (FPSCR) FpscrQc;
2324        if (shiftAmt < 0) {
2325            shiftAmt = -shiftAmt;
2326            if (shiftAmt >= sizeof(Element) * 8) {
2327                shiftAmt = sizeof(Element) * 8 - 1;
2328                destElem = 0;
2329            } else {
2330                destElem = (srcElem1 >> shiftAmt);
2331            }
2332        } else if (shiftAmt > 0) {
2333            if (shiftAmt >= sizeof(Element) * 8) {
2334                if (srcElem1 != 0) {
2335                    destElem = mask(sizeof(Element) * 8);
2336                    fpscr.qc = 1;
2337                } else {
2338                    destElem = 0;
2339                }
2340            } else {
2341                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2342                            sizeof(Element) * 8 - shiftAmt)) {
2343                    destElem = mask(sizeof(Element) * 8);
2344                    fpscr.qc = 1;
2345                } else {
2346                    destElem = srcElem1 << shiftAmt;
2347                }
2348            }
2349        } else {
2350            destElem = srcElem1;
2351        }
2352        FpscrQc = fpscr;
2353    '''
2354    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2355    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2356
2357    vqshlSCode = '''
2358        int16_t shiftAmt = (int8_t)srcElem2;
2359        FPSCR fpscr = (FPSCR) FpscrQc;
2360        if (shiftAmt < 0) {
2361            shiftAmt = -shiftAmt;
2362            if (shiftAmt >= sizeof(Element) * 8) {
2363                shiftAmt = sizeof(Element) * 8 - 1;
2364                destElem = 0;
2365            } else {
2366                destElem = (srcElem1 >> shiftAmt);
2367            }
2368            // Make sure the right shift sign extended when it should.
2369            if (srcElem1 < 0 && destElem >= 0) {
2370                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2371                                             1 - shiftAmt));
2372            }
2373        } else if (shiftAmt > 0) {
2374            bool sat = false;
2375            if (shiftAmt >= sizeof(Element) * 8) {
2376                if (srcElem1 != 0)
2377                    sat = true;
2378                else
2379                    destElem = 0;
2380            } else {
2381                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2382                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2383                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2384                    sat = true;
2385                } else {
2386                    destElem = srcElem1 << shiftAmt;
2387                }
2388            }
2389            if (sat) {
2390                fpscr.qc = 1;
2391                destElem = mask(sizeof(Element) * 8 - 1);
2392                if (srcElem1 < 0)
2393                    destElem = ~destElem;
2394            }
2395        } else {
2396            destElem = srcElem1;
2397        }
2398        FpscrQc = fpscr;
2399    '''
2400    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2401    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2402
2403    vqrshlUCode = '''
2404        int16_t shiftAmt = (int8_t)srcElem2;
2405        FPSCR fpscr = (FPSCR) FpscrQc;
2406        if (shiftAmt < 0) {
2407            shiftAmt = -shiftAmt;
2408            Element rBit = 0;
2409            if (shiftAmt <= sizeof(Element) * 8)
2410                rBit = bits(srcElem1, shiftAmt - 1);
2411            if (shiftAmt >= sizeof(Element) * 8) {
2412                shiftAmt = sizeof(Element) * 8 - 1;
2413                destElem = 0;
2414            } else {
2415                destElem = (srcElem1 >> shiftAmt);
2416            }
2417            destElem += rBit;
2418        } else {
2419            if (shiftAmt >= sizeof(Element) * 8) {
2420                if (srcElem1 != 0) {
2421                    destElem = mask(sizeof(Element) * 8);
2422                    fpscr.qc = 1;
2423                } else {
2424                    destElem = 0;
2425                }
2426            } else {
2427                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2428                            sizeof(Element) * 8 - shiftAmt)) {
2429                    destElem = mask(sizeof(Element) * 8);
2430                    fpscr.qc = 1;
2431                } else {
2432                    destElem = srcElem1 << shiftAmt;
2433                }
2434            }
2435        }
2436        FpscrQc = fpscr;
2437    '''
2438    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2439    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2440
2441    vqrshlSCode = '''
2442        int16_t shiftAmt = (int8_t)srcElem2;
2443        FPSCR fpscr = (FPSCR) FpscrQc;
2444        if (shiftAmt < 0) {
2445            shiftAmt = -shiftAmt;
2446            Element rBit = 0;
2447            if (shiftAmt <= sizeof(Element) * 8)
2448                rBit = bits(srcElem1, shiftAmt - 1);
2449            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2450                rBit = 1;
2451            if (shiftAmt >= sizeof(Element) * 8) {
2452                shiftAmt = sizeof(Element) * 8 - 1;
2453                destElem = 0;
2454            } else {
2455                destElem = (srcElem1 >> shiftAmt);
2456            }
2457            // Make sure the right shift sign extended when it should.
2458            if (srcElem1 < 0 && destElem >= 0) {
2459                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2460                                             1 - shiftAmt));
2461            }
2462            destElem += rBit;
2463        } else if (shiftAmt > 0) {
2464            bool sat = false;
2465            if (shiftAmt >= sizeof(Element) * 8) {
2466                if (srcElem1 != 0)
2467                    sat = true;
2468                else
2469                    destElem = 0;
2470            } else {
2471                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2472                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2473                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2474                    sat = true;
2475                } else {
2476                    destElem = srcElem1 << shiftAmt;
2477                }
2478            }
2479            if (sat) {
2480                fpscr.qc = 1;
2481                destElem = mask(sizeof(Element) * 8 - 1);
2482                if (srcElem1 < 0)
2483                    destElem = ~destElem;
2484            }
2485        } else {
2486            destElem = srcElem1;
2487        }
2488        FpscrQc = fpscr;
2489    '''
2490    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2491    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2492
2493    vabaCode = '''
2494        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2495                                            (srcElem2 - srcElem1);
2496    '''
2497    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2498    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2499    vabalCode = '''
2500        destElem += (srcElem1 > srcElem2) ?
2501            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2502            ((BigElement)srcElem2 - (BigElement)srcElem1);
2503    '''
2504    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2505
2506    vabdCode = '''
2507        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2508                                           (srcElem2 - srcElem1);
2509    '''
2510    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2511    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2512    vabdlCode = '''
2513        destElem = (srcElem1 > srcElem2) ?
2514            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2515            ((BigElement)srcElem2 - (BigElement)srcElem1);
2516    '''
2517    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2518
2519    vtstCode = '''
2520        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2521    '''
2522    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2523    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2524
2525    vmulCode = '''
2526        destElem = srcElem1 * srcElem2;
2527    '''
2528    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2529    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2530    vmullCode = '''
2531        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2532    '''
2533    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2534
2535    vmlaCode = '''
2536        destElem = destElem + srcElem1 * srcElem2;
2537    '''
2538    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2539    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2540    vmlalCode = '''
2541        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2542    '''
2543    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2544
2545    vqdmlalCode = '''
2546        FPSCR fpscr = (FPSCR) FpscrQc;
2547        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2548        Element maxNeg = std::numeric_limits<Element>::min();
2549        Element halfNeg = maxNeg / 2;
2550        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2551            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2552            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2553            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2554            fpscr.qc = 1;
2555        }
2556        bool negPreDest = ltz(destElem);
2557        destElem += midElem;
2558        bool negDest = ltz(destElem);
2559        bool negMid = ltz(midElem);
2560        if (negPreDest == negMid && negMid != negDest) {
2561            destElem = mask(sizeof(BigElement) * 8 - 1);
2562            if (negPreDest)
2563                destElem = ~destElem;
2564            fpscr.qc = 1;
2565        }
2566        FpscrQc = fpscr;
2567    '''
2568    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2569
2570    vqdmlslCode = '''
2571        FPSCR fpscr = (FPSCR) FpscrQc;
2572        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2573        Element maxNeg = std::numeric_limits<Element>::min();
2574        Element halfNeg = maxNeg / 2;
2575        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2576            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2577            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2578            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2579            fpscr.qc = 1;
2580        }
2581        bool negPreDest = ltz(destElem);
2582        destElem -= midElem;
2583        bool negDest = ltz(destElem);
2584        bool posMid = ltz((BigElement)-midElem);
2585        if (negPreDest == posMid && posMid != negDest) {
2586            destElem = mask(sizeof(BigElement) * 8 - 1);
2587            if (negPreDest)
2588                destElem = ~destElem;
2589            fpscr.qc = 1;
2590        }
2591        FpscrQc = fpscr;
2592    '''
2593    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2594
2595    vqdmullCode = '''
2596        FPSCR fpscr = (FPSCR) FpscrQc;
2597        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2598        if (srcElem1 == srcElem2 &&
2599                srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2600            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2601            fpscr.qc = 1;
2602        }
2603        FpscrQc = fpscr;
2604    '''
2605    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2606
2607    vmlsCode = '''
2608        destElem = destElem - srcElem1 * srcElem2;
2609    '''
2610    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2611    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2612    vmlslCode = '''
2613        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2614    '''
2615    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2616
2617    vmulpCode = '''
2618        destElem = 0;
2619        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2620            if (bits(srcElem2, j))
2621                destElem ^= srcElem1 << j;
2622        }
2623    '''
2624    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2625    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2626    vmullpCode = '''
2627        destElem = 0;
2628        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2629            if (bits(srcElem2, j))
2630                destElem ^= (BigElement)srcElem1 << j;
2631        }
2632    '''
2633    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2634
2635    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2636
2637    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2638
2639    vqdmulhCode = '''
2640        FPSCR fpscr = (FPSCR) FpscrQc;
2641        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2642                   (sizeof(Element) * 8);
2643        if (srcElem1 == srcElem2 &&
2644                srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
2645            destElem = ~srcElem1;
2646            fpscr.qc = 1;
2647        }
2648        FpscrQc = fpscr;
2649    '''
2650    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2651    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2652
2653    vqrdmulhCode = '''
2654        FPSCR fpscr = (FPSCR) FpscrQc;
2655        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2656                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2657                   (sizeof(Element) * 8);
2658        Element maxNeg = std::numeric_limits<Element>::min();
2659        Element halfNeg = maxNeg / 2;
2660        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2661            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2662            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2663            if (destElem < 0) {
2664                destElem = mask(sizeof(Element) * 8 - 1);
2665            } else {
2666                destElem = std::numeric_limits<Element>::min();
2667            }
2668            fpscr.qc = 1;
2669        }
2670        FpscrQc = fpscr;
2671    '''
2672    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2673            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2674    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2675            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2676
2677    vMinMaxFpCode = '''
2678        destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
2679    '''
2680    vMinMaxInsts = [
2681        ("vmax",   "VmaxDFp",   2, "Max",    False, ),
2682        ("vmax",   "VmaxQFp",   4, "Max",    False, ),
2683        ("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ),
2684        ("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ),
2685        ("vpmax",  "VpmaxDFp",  2, "Max",    True,  ),
2686        ("vpmax",  "VpmaxQFp",  4, "Max",    True,  ),
2687        ("vmin",   "VminDFp",   2, "Min",    False, ),
2688        ("vmin",   "VminQFp",   4, "Min",    False, ),
2689        ("vminnm", "VminnmDFp", 2, "MinNum", False, ),
2690        ("vminnm", "VminnmQFp", 4, "MinNum", False, ),
2691        ("vpmin",  "VpminDFp",  2, "Min",    True,  ),
2692        ("vpmin",  "VpminQFp",  4, "Min",    True,  ),
2693    ]
2694    for name, Name, rCount, op, pairwise in vMinMaxInsts:
2695        threeEqualRegInst(
2696            name,
2697            Name,
2698            "SimdFloatCmpOp",
2699            ("uint32_t",),
2700            rCount,
2701            vMinMaxFpCode % op,
2702            pairwise=pairwise,
2703            standardFpcsr=True,
2704        )
2705
2706    vaddfpCode = '''
2707        FPSCR fpscr = (FPSCR) FpscrExc;
2708        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2709                           true, true, VfpRoundNearest);
2710        FpscrExc = fpscr;
2711    '''
2712    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2713    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2714
2715    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2716                        2, vaddfpCode, pairwise=True)
2717    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2718                        4, vaddfpCode, pairwise=True)
2719
2720    vsubfpCode = '''
2721        FPSCR fpscr = (FPSCR) FpscrExc;
2722        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2723                           true, true, VfpRoundNearest);
2724        FpscrExc = fpscr;
2725    '''
2726    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2727    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2728
2729    vmulfpCode = '''
2730        FPSCR fpscr = (FPSCR) FpscrExc;
2731        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2732                           true, true, VfpRoundNearest);
2733        FpscrExc = fpscr;
2734    '''
2735    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2736    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2737
2738    vmlafpCode = '''
2739        FPSCR fpscr = (FPSCR) FpscrExc;
2740        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2741                             true, true, VfpRoundNearest);
2742        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2743                           true, true, VfpRoundNearest);
2744        FpscrExc = fpscr;
2745    '''
2746    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2747    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2748
2749    vfmafpCode = '''
2750        FPSCR fpscr = (FPSCR) FpscrExc;
2751        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2752                            true, true, VfpRoundNearest);
2753        FpscrExc = fpscr;
2754    '''
2755    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2756    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2757
2758    vfmsfpCode = '''
2759        FPSCR fpscr = (FPSCR) FpscrExc;
2760        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2761                            true, true, VfpRoundNearest);
2762        FpscrExc = fpscr;
2763    '''
2764    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2765    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2766
2767    vmlsfpCode = '''
2768        FPSCR fpscr = (FPSCR) FpscrExc;
2769        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2770                             true, true, VfpRoundNearest);
2771        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2772                           true, true, VfpRoundNearest);
2773        FpscrExc = fpscr;
2774    '''
2775    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2776    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2777
2778    vcgtfpCode = '''
2779        FPSCR fpscr = (FPSCR) FpscrExc;
2780        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2781                             true, true, VfpRoundNearest);
2782        destReg = (res == 0) ? -1 : 0;
2783        if (res == 2.0)
2784            fpscr.ioc = 1;
2785        FpscrExc = fpscr;
2786    '''
2787    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2788            2, vcgtfpCode, toInt = True)
2789    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2790            4, vcgtfpCode, toInt = True)
2791
2792    vcgefpCode = '''
2793        FPSCR fpscr = (FPSCR) FpscrExc;
2794        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2795                             true, true, VfpRoundNearest);
2796        destReg = (res == 0) ? -1 : 0;
2797        if (res == 2.0)
2798            fpscr.ioc = 1;
2799        FpscrExc = fpscr;
2800    '''
2801    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2802            2, vcgefpCode, toInt = True)
2803    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2804            4, vcgefpCode, toInt = True)
2805
2806    vacgtfpCode = '''
2807        FPSCR fpscr = (FPSCR) FpscrExc;
2808        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2809                             true, true, VfpRoundNearest);
2810        destReg = (res == 0) ? -1 : 0;
2811        if (res == 2.0)
2812            fpscr.ioc = 1;
2813        FpscrExc = fpscr;
2814    '''
2815    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2816            2, vacgtfpCode, toInt = True)
2817    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2818            4, vacgtfpCode, toInt = True)
2819
2820    vacgefpCode = '''
2821        FPSCR fpscr = (FPSCR) FpscrExc;
2822        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2823                             true, true, VfpRoundNearest);
2824        destReg = (res == 0) ? -1 : 0;
2825        if (res == 2.0)
2826            fpscr.ioc = 1;
2827        FpscrExc = fpscr;
2828    '''
2829    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2830            2, vacgefpCode, toInt = True)
2831    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2832            4, vacgefpCode, toInt = True)
2833
2834    vceqfpCode = '''
2835        FPSCR fpscr = (FPSCR) FpscrExc;
2836        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2837                             true, true, VfpRoundNearest);
2838        destReg = (res == 0) ? -1 : 0;
2839        if (res == 2.0)
2840            fpscr.ioc = 1;
2841        FpscrExc = fpscr;
2842    '''
2843    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2844            2, vceqfpCode, toInt = True)
2845    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2846            4, vceqfpCode, toInt = True)
2847
2848    vrecpsCode = '''
2849        FPSCR fpscr = (FPSCR) FpscrExc;
2850        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2851                           true, true, VfpRoundNearest);
2852        FpscrExc = fpscr;
2853    '''
2854    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2855    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2856
2857    vrsqrtsCode = '''
2858        FPSCR fpscr = (FPSCR) FpscrExc;
2859        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2860                           true, true, VfpRoundNearest);
2861        FpscrExc = fpscr;
2862    '''
2863    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2864    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2865
2866    vabdfpCode = '''
2867        FPSCR fpscr = (FPSCR) FpscrExc;
2868        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2869                             true, true, VfpRoundNearest);
2870        destReg = fabs(mid);
2871        FpscrExc = fpscr;
2872    '''
2873    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2874    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2875
2876    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2877    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2878    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2879    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2880    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2881
2882    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2883    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2884    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2885    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2886    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2887
2888    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2889    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2890    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2891    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2892    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2893
2894    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2895    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2896    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2897    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2898    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2899    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2900            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2901    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2902            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2903
2904    vshrCode = '''
2905        if (imm >= sizeof(srcElem1) * 8) {
2906            if (ltz(srcElem1))
2907                destElem = -1;
2908            else
2909                destElem = 0;
2910        } else {
2911            destElem = srcElem1 >> imm;
2912        }
2913    '''
2914    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2915    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2916
2917    vsraCode = '''
2918        Element mid;;
2919        if (imm >= sizeof(srcElem1) * 8) {
2920            mid = ltz(srcElem1) ? -1 : 0;
2921        } else {
2922            mid = srcElem1 >> imm;
2923            if (ltz(srcElem1) && !ltz(mid)) {
2924                mid |= -(mid & ((Element)1 <<
2925                            (sizeof(Element) * 8 - 1 - imm)));
2926            }
2927        }
2928        destElem += mid;
2929    '''
2930    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2931    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2932
2933    vrshrCode = '''
2934        if (imm > sizeof(srcElem1) * 8) {
2935            destElem = 0;
2936        } else if (imm) {
2937            Element rBit = bits(srcElem1, imm - 1);
2938            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2939        } else {
2940            destElem = srcElem1;
2941        }
2942    '''
2943    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2944    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2945
2946    vrsraCode = '''
2947        if (imm > sizeof(srcElem1) * 8) {
2948            destElem += 0;
2949        } else if (imm) {
2950            Element rBit = bits(srcElem1, imm - 1);
2951            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2952        } else {
2953            destElem += srcElem1;
2954        }
2955    '''
2956    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2957    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2958
2959    vsriCode = '''
2960        if (imm >= sizeof(Element) * 8) {
2961            destElem = destElem;
2962        } else {
2963            destElem = (srcElem1 >> imm) |
2964                (destElem & ~mask(sizeof(Element) * 8 - imm));
2965        }
2966    '''
2967    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2968    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2969
2970    vshlCode = '''
2971        if (imm >= sizeof(Element) * 8) {
2972            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2973        } else {
2974            destElem = srcElem1 << imm;
2975        }
2976    '''
2977    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2978    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2979
2980    vsliCode = '''
2981        if (imm >= sizeof(Element) * 8) {
2982            destElem = destElem;
2983        } else {
2984            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2985        }
2986    '''
2987    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2988    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2989
2990    vqshlCode = '''
2991        FPSCR fpscr = (FPSCR) FpscrQc;
2992        if (imm >= sizeof(Element) * 8) {
2993            if (srcElem1 != 0) {
2994                destElem = std::numeric_limits<Element>::min();
2995                if (srcElem1 > 0)
2996                    destElem = ~destElem;
2997                fpscr.qc = 1;
2998            } else {
2999                destElem = 0;
3000            }
3001        } else if (imm) {
3002            destElem = (srcElem1 << imm);
3003            uint64_t topBits = bits((uint64_t)srcElem1,
3004                                    sizeof(Element) * 8 - 1,
3005                                    sizeof(Element) * 8 - 1 - imm);
3006            if (topBits != 0 && topBits != mask(imm + 1)) {
3007                destElem = std::numeric_limits<Element>::min();
3008                if (srcElem1 > 0)
3009                    destElem = ~destElem;
3010                fpscr.qc = 1;
3011            }
3012        } else {
3013            destElem = srcElem1;
3014        }
3015        FpscrQc = fpscr;
3016    '''
3017    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
3018    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
3019
3020    vqshluCode = '''
3021        FPSCR fpscr = (FPSCR) FpscrQc;
3022        if (imm >= sizeof(Element) * 8) {
3023            if (srcElem1 != 0) {
3024                destElem = mask(sizeof(Element) * 8);
3025                fpscr.qc = 1;
3026            } else {
3027                destElem = 0;
3028            }
3029        } else if (imm) {
3030            destElem = (srcElem1 << imm);
3031            uint64_t topBits = bits((uint64_t)srcElem1,
3032                                    sizeof(Element) * 8 - 1,
3033                                    sizeof(Element) * 8 - imm);
3034            if (topBits != 0) {
3035                destElem = mask(sizeof(Element) * 8);
3036                fpscr.qc = 1;
3037            }
3038        } else {
3039            destElem = srcElem1;
3040        }
3041        FpscrQc = fpscr;
3042    '''
3043    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3044    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3045
3046    vqshlusCode = '''
3047        FPSCR fpscr = (FPSCR) FpscrQc;
3048        if (imm >= sizeof(Element) * 8) {
3049            if (srcElem1 < 0) {
3050                destElem = 0;
3051                fpscr.qc = 1;
3052            } else if (srcElem1 > 0) {
3053                destElem = mask(sizeof(Element) * 8);
3054                fpscr.qc = 1;
3055            } else {
3056                destElem = 0;
3057            }
3058        } else if (imm) {
3059            destElem = (srcElem1 << imm);
3060            uint64_t topBits = bits((uint64_t)srcElem1,
3061                                    sizeof(Element) * 8 - 1,
3062                                    sizeof(Element) * 8 - imm);
3063            if (srcElem1 < 0) {
3064                destElem = 0;
3065                fpscr.qc = 1;
3066            } else if (topBits != 0) {
3067                destElem = mask(sizeof(Element) * 8);
3068                fpscr.qc = 1;
3069            }
3070        } else {
3071            if (srcElem1 < 0) {
3072                fpscr.qc = 1;
3073                destElem = 0;
3074            } else {
3075                destElem = srcElem1;
3076            }
3077        }
3078        FpscrQc = fpscr;
3079    '''
3080    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3081    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3082
3083    vshrnCode = '''
3084        if (imm >= sizeof(srcElem1) * 8) {
3085            destElem = 0;
3086        } else {
3087            destElem = srcElem1 >> imm;
3088        }
3089    '''
3090    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3091
3092    vrshrnCode = '''
3093        if (imm > sizeof(srcElem1) * 8) {
3094            destElem = 0;
3095        } else if (imm) {
3096            Element rBit = bits(srcElem1, imm - 1);
3097            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3098        } else {
3099            destElem = srcElem1;
3100        }
3101    '''
3102    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3103
3104    vqshrnCode = '''
3105        FPSCR fpscr = (FPSCR) FpscrQc;
3106        if (imm > sizeof(srcElem1) * 8) {
3107            if (srcElem1 != 0 && srcElem1 != -1)
3108                fpscr.qc = 1;
3109            destElem = 0;
3110        } else if (imm) {
3111            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3112            mid |= -(mid & ((BigElement)1 <<
3113                        (sizeof(BigElement) * 8 - 1 - imm)));
3114            if (mid != (Element)mid) {
3115                destElem = mask(sizeof(Element) * 8 - 1);
3116                if (srcElem1 < 0)
3117                    destElem = ~destElem;
3118                fpscr.qc = 1;
3119            } else {
3120                destElem = mid;
3121            }
3122        } else {
3123            destElem = srcElem1;
3124        }
3125        FpscrQc = fpscr;
3126    '''
3127    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3128
3129    vqshrunCode = '''
3130        FPSCR fpscr = (FPSCR) FpscrQc;
3131        if (imm > sizeof(srcElem1) * 8) {
3132            if (srcElem1 != 0)
3133                fpscr.qc = 1;
3134            destElem = 0;
3135        } else if (imm) {
3136            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3137            if (mid != (Element)mid) {
3138                destElem = mask(sizeof(Element) * 8);
3139                fpscr.qc = 1;
3140            } else {
3141                destElem = mid;
3142            }
3143        } else {
3144            destElem = srcElem1;
3145        }
3146        FpscrQc = fpscr;
3147    '''
3148    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3149                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3150
3151    vqshrunsCode = '''
3152        FPSCR fpscr = (FPSCR) FpscrQc;
3153        if (imm > sizeof(srcElem1) * 8) {
3154            if (srcElem1 != 0)
3155                fpscr.qc = 1;
3156            destElem = 0;
3157        } else if (imm) {
3158            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3159            if (bits(mid, sizeof(BigElement) * 8 - 1,
3160                          sizeof(Element) * 8) != 0) {
3161                if (srcElem1 < 0) {
3162                    destElem = 0;
3163                } else {
3164                    destElem = mask(sizeof(Element) * 8);
3165                }
3166                fpscr.qc = 1;
3167            } else {
3168                destElem = mid;
3169            }
3170        } else {
3171            destElem = srcElem1;
3172        }
3173        FpscrQc = fpscr;
3174    '''
3175    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3176                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3177
3178    vqrshrnCode = '''
3179        FPSCR fpscr = (FPSCR) FpscrQc;
3180        if (imm > sizeof(srcElem1) * 8) {
3181            if (srcElem1 != 0 && srcElem1 != -1)
3182                fpscr.qc = 1;
3183            destElem = 0;
3184        } else if (imm) {
3185            BigElement mid = (srcElem1 >> (imm - 1));
3186            uint64_t rBit = mid & 0x1;
3187            mid >>= 1;
3188            mid |= -(mid & ((BigElement)1 <<
3189                        (sizeof(BigElement) * 8 - 1 - imm)));
3190            mid += rBit;
3191            if (mid != (Element)mid) {
3192                destElem = mask(sizeof(Element) * 8 - 1);
3193                if (srcElem1 < 0)
3194                    destElem = ~destElem;
3195                fpscr.qc = 1;
3196            } else {
3197                destElem = mid;
3198            }
3199        } else {
3200            if (srcElem1 != (Element)srcElem1) {
3201                destElem = mask(sizeof(Element) * 8 - 1);
3202                if (srcElem1 < 0)
3203                    destElem = ~destElem;
3204                fpscr.qc = 1;
3205            } else {
3206                destElem = srcElem1;
3207            }
3208        }
3209        FpscrQc = fpscr;
3210    '''
3211    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3212                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3213
3214    vqrshrunCode = '''
3215        FPSCR fpscr = (FPSCR) FpscrQc;
3216        if (imm > sizeof(srcElem1) * 8) {
3217            if (srcElem1 != 0)
3218                fpscr.qc = 1;
3219            destElem = 0;
3220        } else if (imm) {
3221            BigElement mid = (srcElem1 >> (imm - 1));
3222            uint64_t rBit = mid & 0x1;
3223            mid >>= 1;
3224            mid += rBit;
3225            if (mid != (Element)mid) {
3226                destElem = mask(sizeof(Element) * 8);
3227                fpscr.qc = 1;
3228            } else {
3229                destElem = mid;
3230            }
3231        } else {
3232            if (srcElem1 != (Element)srcElem1) {
3233                destElem = mask(sizeof(Element) * 8 - 1);
3234                fpscr.qc = 1;
3235            } else {
3236                destElem = srcElem1;
3237            }
3238        }
3239        FpscrQc = fpscr;
3240    '''
3241    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3242                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3243
3244    vqrshrunsCode = '''
3245        FPSCR fpscr = (FPSCR) FpscrQc;
3246        if (imm > sizeof(srcElem1) * 8) {
3247            if (srcElem1 != 0)
3248                fpscr.qc = 1;
3249            destElem = 0;
3250        } else if (imm) {
3251            BigElement mid = (srcElem1 >> (imm - 1));
3252            uint64_t rBit = mid & 0x1;
3253            mid >>= 1;
3254            mid |= -(mid & ((BigElement)1 <<
3255                            (sizeof(BigElement) * 8 - 1 - imm)));
3256            mid += rBit;
3257            if (bits(mid, sizeof(BigElement) * 8 - 1,
3258                          sizeof(Element) * 8) != 0) {
3259                if (srcElem1 < 0) {
3260                    destElem = 0;
3261                } else {
3262                    destElem = mask(sizeof(Element) * 8);
3263                }
3264                fpscr.qc = 1;
3265            } else {
3266                destElem = mid;
3267            }
3268        } else {
3269            if (srcElem1 < 0) {
3270                fpscr.qc = 1;
3271                destElem = 0;
3272            } else {
3273                destElem = srcElem1;
3274            }
3275        }
3276        FpscrQc = fpscr;
3277    '''
3278    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3279                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3280
3281    vshllCode = '''
3282        if (imm >= sizeof(destElem) * 8) {
3283            destElem = 0;
3284        } else {
3285            destElem = (BigElement)srcElem1 << imm;
3286        }
3287    '''
3288    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3289
3290    vmovlCode = '''
3291        destElem = srcElem1;
3292    '''
3293    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3294
3295    vcvt2ufxCode = '''
3296        FPSCR fpscr = (FPSCR) FpscrExc;
3297        if (flushToZero(srcElem1))
3298            fpscr.idc = 1;
3299        VfpSavedState state = prepFpState(VfpRoundNearest);
3300        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3301        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3302        __asm__ __volatile__("" :: "m" (destReg));
3303        finishVfp(fpscr, state, true);
3304        FpscrExc = fpscr;
3305    '''
3306    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3307            2, vcvt2ufxCode, toInt = True)
3308    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3309            4, vcvt2ufxCode, toInt = True)
3310
3311    vcvt2sfxCode = '''
3312        FPSCR fpscr = (FPSCR) FpscrExc;
3313        if (flushToZero(srcElem1))
3314            fpscr.idc = 1;
3315        VfpSavedState state = prepFpState(VfpRoundNearest);
3316        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3317        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3318        __asm__ __volatile__("" :: "m" (destReg));
3319        finishVfp(fpscr, state, true);
3320        FpscrExc = fpscr;
3321    '''
3322    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3323            2, vcvt2sfxCode, toInt = True)
3324    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3325            4, vcvt2sfxCode, toInt = True)
3326
3327    vcvtu2fpCode = '''
3328        FPSCR fpscr = (FPSCR) FpscrExc;
3329        VfpSavedState state = prepFpState(VfpRoundNearest);
3330        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3331        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3332        __asm__ __volatile__("" :: "m" (destElem));
3333        finishVfp(fpscr, state, true);
3334        FpscrExc = fpscr;
3335    '''
3336    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3337            2, vcvtu2fpCode, fromInt = True)
3338    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3339            4, vcvtu2fpCode, fromInt = True)
3340
3341    vcvts2fpCode = '''
3342        FPSCR fpscr = (FPSCR) FpscrExc;
3343        VfpSavedState state = prepFpState(VfpRoundNearest);
3344        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3345        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3346        __asm__ __volatile__("" :: "m" (destElem));
3347        finishVfp(fpscr, state, true);
3348        FpscrExc = fpscr;
3349    '''
3350    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3351            2, vcvts2fpCode, fromInt = True)
3352    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3353            4, vcvts2fpCode, fromInt = True)
3354
3355    vcvts2hCode = '''
3356        destElem = 0;
3357        FPSCR fpscr = (FPSCR) FpscrExc;
3358        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3359        if (flushToZero(srcFp1))
3360            fpscr.idc = 1;
3361        VfpSavedState state = prepFpState(VfpRoundNearest);
3362        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3363                                : "m" (srcFp1), "m" (destElem));
3364        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3365                              fpscr.ahp, srcFp1);
3366        __asm__ __volatile__("" :: "m" (destElem));
3367        finishVfp(fpscr, state, true);
3368        FpscrExc = fpscr;
3369    '''
3370    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3371
3372    vcvth2sCode = '''
3373        destElem = 0;
3374        FPSCR fpscr = (FPSCR) FpscrExc;
3375        VfpSavedState state = prepFpState(VfpRoundNearest);
3376        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3377                                : "m" (srcElem1), "m" (destElem));
3378        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3379        __asm__ __volatile__("" :: "m" (destElem));
3380        finishVfp(fpscr, state, true);
3381        FpscrExc = fpscr;
3382    '''
3383    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3384
3385    vrsqrteCode = '''
3386        destElem = unsignedRSqrtEstimate(srcElem1);
3387    '''
3388    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3389    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3390
3391    vrsqrtefpCode = '''
3392        FPSCR fpscr = (FPSCR) FpscrExc;
3393        if (flushToZero(srcReg1))
3394            fpscr.idc = 1;
3395        destReg = fprSqrtEstimate(fpscr, srcReg1);
3396        FpscrExc = fpscr;
3397    '''
3398    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3399    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3400
3401    vrecpeCode = '''
3402        destElem = unsignedRecipEstimate(srcElem1);
3403    '''
3404    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3405    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3406
3407    vrecpefpCode = '''
3408        FPSCR fpscr = (FPSCR) FpscrExc;
3409        if (flushToZero(srcReg1))
3410            fpscr.idc = 1;
3411        destReg = fpRecipEstimate(fpscr, srcReg1);
3412        FpscrExc = fpscr;
3413    '''
3414    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3415    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3416
3417    vrev16Code = '''
3418        destElem = srcElem1;
3419        unsigned groupSize = ((1 << 1) / sizeof(Element));
3420        unsigned reverseMask = (groupSize - 1);
3421        j = i ^ reverseMask;
3422    '''
3423    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3424    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3425    vrev32Code = '''
3426        destElem = srcElem1;
3427        unsigned groupSize = ((1 << 2) / sizeof(Element));
3428        unsigned reverseMask = (groupSize - 1);
3429        j = i ^ reverseMask;
3430    '''
3431    twoRegMiscInst("vrev32", "NVrev32D",
3432            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3433    twoRegMiscInst("vrev32", "NVrev32Q",
3434            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3435    vrev64Code = '''
3436        destElem = srcElem1;
3437        unsigned groupSize = ((1 << 3) / sizeof(Element));
3438        unsigned reverseMask = (groupSize - 1);
3439        j = i ^ reverseMask;
3440    '''
3441    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3442    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3443
3444    split('exec')
3445    exec_output += vcompares + vcomparesL
3446
3447    vpaddlCode = '''
3448        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3449    '''
3450    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3451    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3452
3453    vpadalCode = '''
3454        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3455    '''
3456    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3457    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3458
3459    vclsCode = '''
3460        unsigned count = 0;
3461        if (srcElem1 < 0) {
3462            srcElem1 <<= 1;
3463            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3464                count++;
3465                srcElem1 <<= 1;
3466            }
3467        } else {
3468            srcElem1 <<= 1;
3469            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3470                count++;
3471                srcElem1 <<= 1;
3472            }
3473        }
3474        destElem = count;
3475    '''
3476    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3477    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3478
3479    vclzCode = '''
3480        unsigned count = 0;
3481        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3482            count++;
3483            srcElem1 <<= 1;
3484        }
3485        destElem = count;
3486    '''
3487    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3488    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3489
3490    vcntCode = '''
3491        unsigned count = 0;
3492        while (srcElem1 && count < sizeof(Element) * 8) {
3493            count += srcElem1 & 0x1;
3494            srcElem1 >>= 1;
3495        }
3496        destElem = count;
3497    '''
3498
3499    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3500    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3501
3502    vmvnCode = '''
3503        destElem = ~srcElem1;
3504    '''
3505    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3506    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3507
3508    vqabsCode = '''
3509        FPSCR fpscr = (FPSCR) FpscrQc;
3510        if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3511            fpscr.qc = 1;
3512            destElem = ~srcElem1;
3513        } else if (srcElem1 < 0) {
3514            destElem = -srcElem1;
3515        } else {
3516            destElem = srcElem1;
3517        }
3518        FpscrQc = fpscr;
3519    '''
3520    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3521    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3522
3523    vqnegCode = '''
3524        FPSCR fpscr = (FPSCR) FpscrQc;
3525        if (srcElem1 == (Element)(std::numeric_limits<Element>::min())) {
3526            fpscr.qc = 1;
3527            destElem = ~srcElem1;
3528        } else {
3529            destElem = -srcElem1;
3530        }
3531        FpscrQc = fpscr;
3532    '''
3533    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3534    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3535
3536    vabsCode = '''
3537        if (srcElem1 < 0) {
3538            destElem = -srcElem1;
3539        } else {
3540            destElem = srcElem1;
3541        }
3542    '''
3543
3544    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3545    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3546    vabsfpCode = '''
3547        union
3548        {
3549            uint32_t i;
3550            float f;
3551        } cStruct;
3552        cStruct.f = srcReg1;
3553        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3554        destReg = cStruct.f;
3555    '''
3556    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3557    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3558
3559    vnegCode = '''
3560        destElem = -srcElem1;
3561    '''
3562    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3563    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3564    vnegfpCode = '''
3565        destReg = -srcReg1;
3566    '''
3567    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3568    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3569
3570    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3571    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3572    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3573    vcgtfpCode = '''
3574        FPSCR fpscr = (FPSCR) FpscrExc;
3575        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgtFunc,
3576                             true, true, VfpRoundNearest);
3577        destReg = (res == 0) ? -1 : 0;
3578        if (res == 2.0)
3579            fpscr.ioc = 1;
3580        FpscrExc = fpscr;
3581    '''
3582    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3583            2, vcgtfpCode, toInt = True)
3584    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3585            4, vcgtfpCode, toInt = True)
3586
3587    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3588    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3589    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3590    vcgefpCode = '''
3591        FPSCR fpscr = (FPSCR) FpscrExc;
3592        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcgeFunc,
3593                             true, true, VfpRoundNearest);
3594        destReg = (res == 0) ? -1 : 0;
3595        if (res == 2.0)
3596            fpscr.ioc = 1;
3597        FpscrExc = fpscr;
3598    '''
3599    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3600            2, vcgefpCode, toInt = True)
3601    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3602            4, vcgefpCode, toInt = True)
3603
3604    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3605    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3606    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3607    vceqfpCode = '''
3608        FPSCR fpscr = (FPSCR) FpscrExc;
3609        float res = binaryOp(fpscr, srcReg1, (float)0.0, vceqFunc,
3610                             true, true, VfpRoundNearest);
3611        destReg = (res == 0) ? -1 : 0;
3612        if (res == 2.0)
3613            fpscr.ioc = 1;
3614        FpscrExc = fpscr;
3615    '''
3616    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3617            2, vceqfpCode, toInt = True)
3618    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3619            4, vceqfpCode, toInt = True)
3620
3621    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3622    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3623    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3624    vclefpCode = '''
3625        FPSCR fpscr = (FPSCR) FpscrExc;
3626        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcleFunc,
3627                             true, true, VfpRoundNearest);
3628        destReg = (res == 0) ? -1 : 0;
3629        if (res == 2.0)
3630            fpscr.ioc = 1;
3631        FpscrExc = fpscr;
3632    '''
3633    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3634            2, vclefpCode, toInt = True)
3635    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3636            4, vclefpCode, toInt = True)
3637
3638    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3639    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3640    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3641    vcltfpCode = '''
3642        FPSCR fpscr = (FPSCR) FpscrExc;
3643        float res = binaryOp(fpscr, srcReg1, (float)0.0, vcltFunc,
3644                             true, true, VfpRoundNearest);
3645        destReg = (res == 0) ? -1 : 0;
3646        if (res == 2.0)
3647            fpscr.ioc = 1;
3648        FpscrExc = fpscr;
3649    '''
3650    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3651            2, vcltfpCode, toInt = True)
3652    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3653            4, vcltfpCode, toInt = True)
3654
3655    vswpCode = '''
3656        uint32_t mid;
3657        for (unsigned r = 0; r < rCount; r++) {
3658            mid = srcReg1.regs[r];
3659            srcReg1.regs[r] = destReg.regs[r];
3660            destReg.regs[r] = mid;
3661        }
3662    '''
3663    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3664    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3665
3666    vtrnCode = '''
3667        Element mid;
3668        for (unsigned i = 0; i < eCount; i += 2) {
3669            mid = srcReg1.elements[i];
3670            srcReg1.elements[i] = destReg.elements[i + 1];
3671            destReg.elements[i + 1] = mid;
3672        }
3673    '''
3674    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3675            smallUnsignedTypes, 2, vtrnCode)
3676    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3677            smallUnsignedTypes, 4, vtrnCode)
3678
3679    vuzpCode = '''
3680        Element mid[eCount];
3681        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3682        for (unsigned i = 0; i < eCount / 2; i++) {
3683            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3684            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3685            destReg.elements[i] = destReg.elements[2 * i];
3686        }
3687        for (unsigned i = 0; i < eCount / 2; i++) {
3688            destReg.elements[eCount / 2 + i] = mid[2 * i];
3689        }
3690    '''
3691    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3692    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3693
3694    vzipCode = '''
3695        Element mid[eCount];
3696        memcpy(&mid, &destReg, sizeof(destReg));
3697        for (unsigned i = 0; i < eCount / 2; i++) {
3698            destReg.elements[2 * i] = mid[i];
3699            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3700        }
3701        for (int i = 0; i < eCount / 2; i++) {
3702            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3703            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3704        }
3705    '''
3706    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3707    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3708
3709    vmovnCode = 'destElem = srcElem1;'
3710    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3711
3712    vdupCode = 'destElem = srcElem1;'
3713    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3714    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3715
3716    def vdupGprInst(name, Name, opClass, types, rCount):
3717        global header_output, exec_output
3718        eWalkCode = simdEnabledCheckCode + '''
3719        RegVect destReg;
3720        for (unsigned i = 0; i < eCount; i++) {
3721            destReg.elements[i] = htog((Element)Op1);
3722        }
3723        '''
3724        for reg in range(rCount):
3725            eWalkCode += '''
3726            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3727            ''' % { "reg" : reg }
3728        iop = InstObjParams(name, Name,
3729                            "RegRegOp",
3730                            { "code": eWalkCode,
3731                              "r_count": rCount,
3732                              "predicate_test": predicateTest,
3733                              "op_class": opClass }, [])
3734        header_output += NeonRegRegOpDeclare.subst(iop)
3735        exec_output += NeonEqualRegExecute.subst(iop)
3736        for type in types:
3737            substDict = { "targs" : type,
3738                          "class_name" : Name }
3739            exec_output += NeonExecDeclare.subst(substDict)
3740    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3741    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3742
3743    vmovCode = 'destElem = imm;'
3744    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3745    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3746
3747    vorrCode = 'destElem |= imm;'
3748    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3749    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3750
3751    vmvnCode = 'destElem = ~imm;'
3752    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3753    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3754
3755    vbicCode = 'destElem &= ~imm;'
3756    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3757    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3758
3759    vqmovnCode = '''
3760    FPSCR fpscr = (FPSCR) FpscrQc;
3761    destElem = srcElem1;
3762    if ((BigElement)destElem != srcElem1) {
3763        fpscr.qc = 1;
3764        destElem = mask(sizeof(Element) * 8 - 1);
3765        if (srcElem1 < 0)
3766            destElem = ~destElem;
3767    }
3768    FpscrQc = fpscr;
3769    '''
3770    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3771
3772    vqmovunCode = '''
3773    FPSCR fpscr = (FPSCR) FpscrQc;
3774    destElem = srcElem1;
3775    if ((BigElement)destElem != srcElem1) {
3776        fpscr.qc = 1;
3777        destElem = mask(sizeof(Element) * 8);
3778    }
3779    FpscrQc = fpscr;
3780    '''
3781    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3782            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3783
3784    vqmovunsCode = '''
3785    FPSCR fpscr = (FPSCR) FpscrQc;
3786    destElem = srcElem1;
3787    if (srcElem1 < 0 ||
3788            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3789        fpscr.qc = 1;
3790        destElem = mask(sizeof(Element) * 8);
3791        if (srcElem1 < 0)
3792            destElem = ~destElem;
3793    }
3794    FpscrQc = fpscr;
3795    '''
3796    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3797            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3798
3799    def buildVext(name, Name, opClass, types, rCount, op):
3800        global header_output, exec_output
3801        eWalkCode = simdEnabledCheckCode + '''
3802        RegVect srcReg1, srcReg2, destReg;
3803        '''
3804        for reg in range(rCount):
3805            eWalkCode += '''
3806                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3807                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3808            ''' % { "reg" : reg }
3809        eWalkCode += op
3810        for reg in range(rCount):
3811            eWalkCode += '''
3812            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3813            ''' % { "reg" : reg }
3814        iop = InstObjParams(name, Name,
3815                            "RegRegRegImmOp",
3816                            { "code": eWalkCode,
3817                              "r_count": rCount,
3818                              "predicate_test": predicateTest,
3819                              "op_class": opClass }, [])
3820        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3821        exec_output += NeonEqualRegExecute.subst(iop)
3822        for type in types:
3823            substDict = { "targs" : type,
3824                          "class_name" : Name }
3825            exec_output += NeonExecDeclare.subst(substDict)
3826
3827    vextCode = '''
3828        for (unsigned i = 0; i < eCount; i++) {
3829            unsigned index = i + imm;
3830            if (index < eCount) {
3831                destReg.elements[i] = srcReg1.elements[index];
3832            } else {
3833                index -= eCount;
3834                if (index >= eCount) {
3835                    fault = std::make_shared<UndefinedInstruction>(machInst,
3836                                                                   false,
3837                                                                   mnemonic);
3838                } else {
3839                    destReg.elements[i] = srcReg2.elements[index];
3840                }
3841            }
3842        }
3843    '''
3844    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3845    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3846
3847    def buildVtbxl(name, Name, opClass, length, isVtbl):
3848        global header_output, decoder_output, exec_output
3849        code = simdEnabledCheckCode + '''
3850            union
3851            {
3852                uint8_t bytes[32];
3853                uint32_t regs[8];
3854            } table;
3855
3856            union
3857            {
3858                uint8_t bytes[8];
3859                uint32_t regs[2];
3860            } destReg, srcReg2;
3861
3862            const unsigned length = %(length)d;
3863            const bool isVtbl = %(isVtbl)s;
3864
3865            srcReg2.regs[0] = htog(FpOp2P0_uw);
3866            srcReg2.regs[1] = htog(FpOp2P1_uw);
3867
3868            destReg.regs[0] = htog(FpDestP0_uw);
3869            destReg.regs[1] = htog(FpDestP1_uw);
3870        ''' % { "length" : length, "isVtbl" : isVtbl }
3871        for reg in range(8):
3872            if reg < length * 2:
3873                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3874                        { "reg" : reg }
3875            else:
3876                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3877        code += '''
3878        for (unsigned i = 0; i < sizeof(destReg); i++) {
3879            uint8_t index = srcReg2.bytes[i];
3880            if (index < 8 * length) {
3881                destReg.bytes[i] = table.bytes[index];
3882            } else {
3883                if (isVtbl)
3884                    destReg.bytes[i] = 0;
3885                // else destReg.bytes[i] unchanged
3886            }
3887        }
3888
3889        FpDestP0_uw = gtoh(destReg.regs[0]);
3890        FpDestP1_uw = gtoh(destReg.regs[1]);
3891        '''
3892        iop = InstObjParams(name, Name,
3893                            "RegRegRegOp",
3894                            { "code": code,
3895                              "predicate_test": predicateTest,
3896                              "op_class": opClass }, [])
3897        header_output += RegRegRegOpDeclare.subst(iop)
3898        decoder_output += RegRegRegOpConstructor.subst(iop)
3899        exec_output += PredOpExecute.subst(iop)
3900
3901    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3902    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3903    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3904    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3905
3906    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3907    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3908    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3909    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3910}};
3911