neon.isa revision 10037:5cac77888310
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2011 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder.  You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40output header {{
41    template <template <typename T> class Base>
42    StaticInstPtr
43    decodeNeonUThreeUReg(unsigned size,
44                         ExtMachInst machInst, IntRegIndex dest,
45                         IntRegIndex op1, IntRegIndex op2)
46    {
47        switch (size) {
48          case 0:
49            return new Base<uint8_t>(machInst, dest, op1, op2);
50          case 1:
51            return new Base<uint16_t>(machInst, dest, op1, op2);
52          case 2:
53            return new Base<uint32_t>(machInst, dest, op1, op2);
54          case 3:
55            return new Base<uint64_t>(machInst, dest, op1, op2);
56          default:
57            return new Unknown(machInst);
58        }
59    }
60
61    template <template <typename T> class Base>
62    StaticInstPtr
63    decodeNeonSThreeUReg(unsigned size,
64                         ExtMachInst machInst, IntRegIndex dest,
65                         IntRegIndex op1, IntRegIndex op2)
66    {
67        switch (size) {
68          case 0:
69            return new Base<int8_t>(machInst, dest, op1, op2);
70          case 1:
71            return new Base<int16_t>(machInst, dest, op1, op2);
72          case 2:
73            return new Base<int32_t>(machInst, dest, op1, op2);
74          case 3:
75            return new Base<int64_t>(machInst, dest, op1, op2);
76          default:
77            return new Unknown(machInst);
78        }
79    }
80
81    template <template <typename T> class Base>
82    StaticInstPtr
83    decodeNeonUSThreeUReg(bool notSigned, unsigned size,
84                          ExtMachInst machInst, IntRegIndex dest,
85                          IntRegIndex op1, IntRegIndex op2)
86    {
87        if (notSigned) {
88            return decodeNeonUThreeUReg<Base>(size, machInst, dest, op1, op2);
89        } else {
90            return decodeNeonSThreeUReg<Base>(size, machInst, dest, op1, op2);
91        }
92    }
93
94    template <template <typename T> class Base>
95    StaticInstPtr
96    decodeNeonUThreeUSReg(unsigned size,
97                          ExtMachInst machInst, IntRegIndex dest,
98                          IntRegIndex op1, IntRegIndex op2)
99    {
100        switch (size) {
101          case 0:
102            return new Base<uint8_t>(machInst, dest, op1, op2);
103          case 1:
104            return new Base<uint16_t>(machInst, dest, op1, op2);
105          case 2:
106            return new Base<uint32_t>(machInst, dest, op1, op2);
107          default:
108            return new Unknown(machInst);
109        }
110    }
111
112    template <template <typename T> class Base>
113    StaticInstPtr
114    decodeNeonSThreeUSReg(unsigned size,
115                          ExtMachInst machInst, IntRegIndex dest,
116                          IntRegIndex op1, IntRegIndex op2)
117    {
118        switch (size) {
119          case 0:
120            return new Base<int8_t>(machInst, dest, op1, op2);
121          case 1:
122            return new Base<int16_t>(machInst, dest, op1, op2);
123          case 2:
124            return new Base<int32_t>(machInst, dest, op1, op2);
125          default:
126            return new Unknown(machInst);
127        }
128    }
129
130    template <template <typename T> class Base>
131    StaticInstPtr
132    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
133                             IntRegIndex dest, IntRegIndex op1,
134                             IntRegIndex op2)
135    {
136        switch (size) {
137          case 1:
138            return new Base<int16_t>(machInst, dest, op1, op2);
139          case 2:
140            return new Base<int32_t>(machInst, dest, op1, op2);
141          default:
142            return new Unknown(machInst);
143        }
144    }
145
146    template <template <typename T> class Base>
147    StaticInstPtr
148    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
149                                IntRegIndex dest, IntRegIndex op1,
150                                IntRegIndex op2, uint64_t imm)
151    {
152        switch (size) {
153          case 1:
154            return new Base<int16_t>(machInst, dest, op1, op2, imm);
155          case 2:
156            return new Base<int32_t>(machInst, dest, op1, op2, imm);
157          default:
158            return new Unknown(machInst);
159        }
160    }
161
162    template <template <typename T> class Base>
163    StaticInstPtr
164    decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
165                           ExtMachInst machInst, IntRegIndex dest,
166                           IntRegIndex op1, IntRegIndex op2)
167    {
168        if (notSigned) {
169            return decodeNeonUThreeUSReg<Base>(
170                    size, machInst, dest, op1, op2);
171        } else {
172            return decodeNeonSThreeUSReg<Base>(
173                    size, machInst, dest, op1, op2);
174        }
175    }
176
177    template <template <typename T> class BaseD,
178              template <typename T> class BaseQ>
179    StaticInstPtr
180    decodeNeonUThreeSReg(bool q, unsigned size,
181                         ExtMachInst machInst, IntRegIndex dest,
182                         IntRegIndex op1, IntRegIndex op2)
183    {
184        if (q) {
185            return decodeNeonUThreeUSReg<BaseQ>(
186                    size, machInst, dest, op1, op2);
187        } else {
188            return decodeNeonUThreeUSReg<BaseD>(
189                    size, machInst, dest, op1, op2);
190        }
191    }
192
193    template <template <typename T> class BaseD,
194              template <typename T> class BaseQ>
195    StaticInstPtr
196    decodeNeonSThreeSReg(bool q, unsigned size,
197                         ExtMachInst machInst, IntRegIndex dest,
198                         IntRegIndex op1, IntRegIndex op2)
199    {
200        if (q) {
201            return decodeNeonSThreeUSReg<BaseQ>(
202                    size, machInst, dest, op1, op2);
203        } else {
204            return decodeNeonSThreeUSReg<BaseD>(
205                    size, machInst, dest, op1, op2);
206        }
207    }
208
209    template <template <typename T> class BaseD,
210              template <typename T> class BaseQ>
211    StaticInstPtr
212    decodeNeonSThreeXReg(bool q, unsigned size,
213                         ExtMachInst machInst, IntRegIndex dest,
214                         IntRegIndex op1, IntRegIndex op2)
215    {
216        if (q) {
217            return decodeNeonSThreeUReg<BaseQ>(
218                    size, machInst, dest, op1, op2);
219        } else {
220            return decodeNeonSThreeUSReg<BaseD>(
221                    size, machInst, dest, op1, op2);
222        }
223    }
224
225    template <template <typename T> class BaseD,
226              template <typename T> class BaseQ>
227    StaticInstPtr
228    decodeNeonUThreeXReg(bool q, unsigned size,
229                         ExtMachInst machInst, IntRegIndex dest,
230                         IntRegIndex op1, IntRegIndex op2)
231    {
232        if (q) {
233            return decodeNeonUThreeUReg<BaseQ>(
234                    size, machInst, dest, op1, op2);
235        } else {
236            return decodeNeonUThreeUSReg<BaseD>(
237                    size, machInst, dest, op1, op2);
238        }
239    }
240
241    template <template <typename T> class BaseD,
242              template <typename T> class BaseQ>
243    StaticInstPtr
244    decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size,
245                          ExtMachInst machInst, IntRegIndex dest,
246                          IntRegIndex op1, IntRegIndex op2)
247    {
248        if (notSigned) {
249            return decodeNeonUThreeSReg<BaseD, BaseQ>(
250                    q, size, machInst, dest, op1, op2);
251        } else {
252            return decodeNeonSThreeSReg<BaseD, BaseQ>(
253                    q, size, machInst, dest, op1, op2);
254        }
255    }
256
257    template <template <typename T> class BaseD,
258              template <typename T> class BaseQ>
259    StaticInstPtr
260    decodeNeonUThreeReg(bool q, unsigned size,
261                        ExtMachInst machInst, IntRegIndex dest,
262                        IntRegIndex op1, IntRegIndex op2)
263    {
264        if (q) {
265            return decodeNeonUThreeUReg<BaseQ>(
266                    size, machInst, dest, op1, op2);
267        } else {
268            return decodeNeonUThreeUReg<BaseD>(
269                    size, machInst, dest, op1, op2);
270        }
271    }
272
273    template <template <typename T> class BaseD,
274              template <typename T> class BaseQ>
275    StaticInstPtr
276    decodeNeonSThreeReg(bool q, unsigned size,
277                        ExtMachInst machInst, IntRegIndex dest,
278                        IntRegIndex op1, IntRegIndex op2)
279    {
280        if (q) {
281            return decodeNeonSThreeUReg<BaseQ>(
282                    size, machInst, dest, op1, op2);
283        } else {
284            return decodeNeonSThreeUReg<BaseD>(
285                    size, machInst, dest, op1, op2);
286        }
287    }
288
289    template <template <typename T> class BaseD,
290              template <typename T> class BaseQ>
291    StaticInstPtr
292    decodeNeonUSThreeReg(bool q, bool notSigned, unsigned size,
293                         ExtMachInst machInst, IntRegIndex dest,
294                         IntRegIndex op1, IntRegIndex op2)
295    {
296        if (notSigned) {
297            return decodeNeonUThreeReg<BaseD, BaseQ>(
298                    q, size, machInst, dest, op1, op2);
299        } else {
300            return decodeNeonSThreeReg<BaseD, BaseQ>(
301                    q, size, machInst, dest, op1, op2);
302        }
303    }
304
305    template <template <typename T> class BaseD,
306              template <typename T> class BaseQ>
307    StaticInstPtr
308    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
309                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
310    {
311        if (q) {
312            if (size)
313                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
314            else
315                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
316        } else {
317            if (size)
318                return new Unknown(machInst);
319            else
320                return new BaseD<uint32_t>(machInst, dest, op1, op2);
321        }
322    }
323
324    template <template <typename T> class Base>
325    StaticInstPtr
326    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
327                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
328    {
329        if (size)
330            return new Base<uint64_t>(machInst, dest, op1, op2);
331        else
332            return new Base<uint32_t>(machInst, dest, op1, op2);
333    }
334
335    template <template <typename T> class Base>
336    StaticInstPtr
337    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
338                               IntRegIndex dest, IntRegIndex op1,
339                               IntRegIndex op2, uint64_t imm)
340    {
341        if (size)
342            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
343        else
344            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
345    }
346
347    template <template <typename T> class BaseD,
348              template <typename T> class BaseQ>
349    StaticInstPtr
350    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
351                                IntRegIndex dest, IntRegIndex op1,
352                                IntRegIndex op2, uint64_t imm)
353    {
354        if (q) {
355            switch (size) {
356              case 1:
357                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
358              case 2:
359                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
360              default:
361                return new Unknown(machInst);
362            }
363        } else {
364            switch (size) {
365              case 1:
366                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
367              case 2:
368                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
369              default:
370                return new Unknown(machInst);
371            }
372        }
373    }
374
375    template <template <typename T> class BaseD,
376              template <typename T> class BaseQ>
377    StaticInstPtr
378    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
379                                IntRegIndex dest, IntRegIndex op1,
380                                IntRegIndex op2, uint64_t imm)
381    {
382        if (q) {
383            switch (size) {
384              case 1:
385                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
386              case 2:
387                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
388              default:
389                return new Unknown(machInst);
390            }
391        } else {
392            switch (size) {
393              case 1:
394                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
395              case 2:
396                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
397              default:
398                return new Unknown(machInst);
399            }
400        }
401    }
402
403    template <template <typename T> class BaseD,
404              template <typename T> class BaseQ>
405    StaticInstPtr
406    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
407                             IntRegIndex dest, IntRegIndex op1,
408                             IntRegIndex op2, uint64_t imm)
409    {
410        if (q) {
411            if (size)
412                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
413            else
414                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
415        } else {
416            if (size)
417                return new Unknown(machInst);
418            else
419                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
420        }
421    }
422
423    template <template <typename T> class BaseD,
424              template <typename T> class BaseQ>
425    StaticInstPtr
426    decodeNeonUTwoShiftReg(bool q, unsigned size,
427                           ExtMachInst machInst, IntRegIndex dest,
428                           IntRegIndex op1, uint64_t imm)
429    {
430        if (q) {
431            switch (size) {
432              case 0:
433                return new BaseQ<uint8_t>(machInst, dest, op1, imm);
434              case 1:
435                return new BaseQ<uint16_t>(machInst, dest, op1, imm);
436              case 2:
437                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
438              case 3:
439                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
440              default:
441                return new Unknown(machInst);
442            }
443        } else {
444            switch (size) {
445              case 0:
446                return new BaseD<uint8_t>(machInst, dest, op1, imm);
447              case 1:
448                return new BaseD<uint16_t>(machInst, dest, op1, imm);
449              case 2:
450                return new BaseD<uint32_t>(machInst, dest, op1, imm);
451              case 3:
452                return new BaseD<uint64_t>(machInst, dest, op1, imm);
453              default:
454                return new Unknown(machInst);
455            }
456        }
457    }
458
459    template <template <typename T> class BaseD,
460              template <typename T> class BaseQ>
461    StaticInstPtr
462    decodeNeonSTwoShiftReg(bool q, unsigned size,
463                           ExtMachInst machInst, IntRegIndex dest,
464                           IntRegIndex op1, uint64_t imm)
465    {
466        if (q) {
467            switch (size) {
468              case 0:
469                return new BaseQ<int8_t>(machInst, dest, op1, imm);
470              case 1:
471                return new BaseQ<int16_t>(machInst, dest, op1, imm);
472              case 2:
473                return new BaseQ<int32_t>(machInst, dest, op1, imm);
474              case 3:
475                return new BaseQ<int64_t>(machInst, dest, op1, imm);
476              default:
477                return new Unknown(machInst);
478            }
479        } else {
480            switch (size) {
481              case 0:
482                return new BaseD<int8_t>(machInst, dest, op1, imm);
483              case 1:
484                return new BaseD<int16_t>(machInst, dest, op1, imm);
485              case 2:
486                return new BaseD<int32_t>(machInst, dest, op1, imm);
487              case 3:
488                return new BaseD<int64_t>(machInst, dest, op1, imm);
489              default:
490                return new Unknown(machInst);
491            }
492        }
493    }
494
495
496    template <template <typename T> class BaseD,
497              template <typename T> class BaseQ>
498    StaticInstPtr
499    decodeNeonUSTwoShiftReg(bool q, bool notSigned, unsigned size,
500                            ExtMachInst machInst, IntRegIndex dest,
501                            IntRegIndex op1, uint64_t imm)
502    {
503        if (notSigned) {
504            return decodeNeonUTwoShiftReg<BaseD, BaseQ>(
505                    q, size, machInst, dest, op1, imm);
506        } else {
507            return decodeNeonSTwoShiftReg<BaseD, BaseQ>(
508                    q, size, machInst, dest, op1, imm);
509        }
510    }
511
512    template <template <typename T> class Base>
513    StaticInstPtr
514    decodeNeonUTwoShiftUSReg(unsigned size,
515                             ExtMachInst machInst, IntRegIndex dest,
516                             IntRegIndex op1, uint64_t imm)
517    {
518        switch (size) {
519          case 0:
520            return new Base<uint8_t>(machInst, dest, op1, imm);
521          case 1:
522            return new Base<uint16_t>(machInst, dest, op1, imm);
523          case 2:
524            return new Base<uint32_t>(machInst, dest, op1, imm);
525          default:
526            return new Unknown(machInst);
527        }
528    }
529
530    template <template <typename T> class Base>
531    StaticInstPtr
532    decodeNeonUTwoShiftUReg(unsigned size,
533                            ExtMachInst machInst, IntRegIndex dest,
534                            IntRegIndex op1, uint64_t imm)
535    {
536        switch (size) {
537          case 0:
538            return new Base<uint8_t>(machInst, dest, op1, imm);
539          case 1:
540            return new Base<uint16_t>(machInst, dest, op1, imm);
541          case 2:
542            return new Base<uint32_t>(machInst, dest, op1, imm);
543          case 3:
544            return new Base<uint64_t>(machInst, dest, op1, imm);
545          default:
546            return new Unknown(machInst);
547        }
548    }
549
550    template <template <typename T> class Base>
551    StaticInstPtr
552    decodeNeonSTwoShiftUReg(unsigned size,
553                            ExtMachInst machInst, IntRegIndex dest,
554                            IntRegIndex op1, uint64_t imm)
555    {
556        switch (size) {
557          case 0:
558            return new Base<int8_t>(machInst, dest, op1, imm);
559          case 1:
560            return new Base<int16_t>(machInst, dest, op1, imm);
561          case 2:
562            return new Base<int32_t>(machInst, dest, op1, imm);
563          case 3:
564            return new Base<int64_t>(machInst, dest, op1, imm);
565          default:
566            return new Unknown(machInst);
567        }
568    }
569
570    template <template <typename T> class BaseD,
571              template <typename T> class BaseQ>
572    StaticInstPtr
573    decodeNeonUTwoShiftSReg(bool q, unsigned size,
574                            ExtMachInst machInst, IntRegIndex dest,
575                            IntRegIndex op1, uint64_t imm)
576    {
577        if (q) {
578            return decodeNeonUTwoShiftUSReg<BaseQ>(
579                    size, machInst, dest, op1, imm);
580        } else {
581            return decodeNeonUTwoShiftUSReg<BaseD>(
582                    size, machInst, dest, op1, imm);
583        }
584    }
585
586    template <template <typename T> class Base>
587    StaticInstPtr
588    decodeNeonSTwoShiftUSReg(unsigned size,
589                             ExtMachInst machInst, IntRegIndex dest,
590                             IntRegIndex op1, uint64_t imm)
591    {
592        switch (size) {
593          case 0:
594            return new Base<int8_t>(machInst, dest, op1, imm);
595          case 1:
596            return new Base<int16_t>(machInst, dest, op1, imm);
597          case 2:
598            return new Base<int32_t>(machInst, dest, op1, imm);
599          default:
600            return new Unknown(machInst);
601        }
602    }
603
604    template <template <typename T> class BaseD,
605              template <typename T> class BaseQ>
606    StaticInstPtr
607    decodeNeonSTwoShiftSReg(bool q, unsigned size,
608                            ExtMachInst machInst, IntRegIndex dest,
609                            IntRegIndex op1, uint64_t imm)
610    {
611        if (q) {
612            return decodeNeonSTwoShiftUSReg<BaseQ>(
613                    size, machInst, dest, op1, imm);
614        } else {
615            return decodeNeonSTwoShiftUSReg<BaseD>(
616                    size, machInst, dest, op1, imm);
617        }
618    }
619
620    template <template <typename T> class BaseD,
621              template <typename T> class BaseQ>
622    StaticInstPtr
623    decodeNeonUSTwoShiftSReg(bool q, bool notSigned, unsigned size,
624                             ExtMachInst machInst, IntRegIndex dest,
625                             IntRegIndex op1, uint64_t imm)
626    {
627        if (notSigned) {
628            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
629                    q, size, machInst, dest, op1, imm);
630        } else {
631            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
632                    q, size, machInst, dest, op1, imm);
633        }
634    }
635
636    template <template <typename T> class BaseD,
637              template <typename T> class BaseQ>
638    StaticInstPtr
639    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
640                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
641    {
642        if (q) {
643            return decodeNeonUTwoShiftUReg<BaseQ>(
644                size, machInst, dest, op1, imm);
645        } else {
646            return decodeNeonUTwoShiftUSReg<BaseD>(
647                size, machInst, dest, op1, imm);
648        }
649    }
650
651    template <template <typename T> class BaseD,
652              template <typename T> class BaseQ>
653    StaticInstPtr
654    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
655                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
656    {
657        if (q) {
658            return decodeNeonSTwoShiftUReg<BaseQ>(
659                size, machInst, dest, op1, imm);
660        } else {
661            return decodeNeonSTwoShiftUSReg<BaseD>(
662                size, machInst, dest, op1, imm);
663        }
664    }
665
666    template <template <typename T> class Base>
667    StaticInstPtr
668    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
669                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
670    {
671        if (size)
672            return new Base<uint64_t>(machInst, dest, op1, imm);
673        else
674            return new Base<uint32_t>(machInst, dest, op1, imm);
675    }
676
677    template <template <typename T> class BaseD,
678              template <typename T> class BaseQ>
679    StaticInstPtr
680    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
681                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
682    {
683        if (q) {
684            if (size)
685                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
686            else
687                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
688        } else {
689            if (size)
690                return new Unknown(machInst);
691            else
692                return new BaseD<uint32_t>(machInst, dest, op1, imm);
693        }
694    }
695
696    template <template <typename T> class Base>
697    StaticInstPtr
698    decodeNeonUTwoMiscUSReg(unsigned size,
699                            ExtMachInst machInst, IntRegIndex dest,
700                            IntRegIndex op1)
701    {
702        switch (size) {
703          case 0:
704            return new Base<uint8_t>(machInst, dest, op1);
705          case 1:
706            return new Base<uint16_t>(machInst, dest, op1);
707          case 2:
708            return new Base<uint32_t>(machInst, dest, op1);
709          default:
710            return new Unknown(machInst);
711        }
712    }
713
714    template <template <typename T> class Base>
715    StaticInstPtr
716    decodeNeonSTwoMiscUSReg(unsigned size,
717                            ExtMachInst machInst, IntRegIndex dest,
718                            IntRegIndex op1)
719    {
720        switch (size) {
721          case 0:
722            return new Base<int8_t>(machInst, dest, op1);
723          case 1:
724            return new Base<int16_t>(machInst, dest, op1);
725          case 2:
726            return new Base<int32_t>(machInst, dest, op1);
727          default:
728            return new Unknown(machInst);
729        }
730    }
731
732    template <template <typename T> class BaseD,
733              template <typename T> class BaseQ>
734    StaticInstPtr
735    decodeNeonUTwoMiscSReg(bool q, unsigned size,
736                           ExtMachInst machInst, IntRegIndex dest,
737                           IntRegIndex op1)
738    {
739        if (q) {
740            return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
741        } else {
742            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
743        }
744    }
745
746    template <template <typename T> class BaseD,
747              template <typename T> class BaseQ>
748    StaticInstPtr
749    decodeNeonSTwoMiscSReg(bool q, unsigned size,
750                           ExtMachInst machInst, IntRegIndex dest,
751                           IntRegIndex op1)
752    {
753        if (q) {
754            return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
755        } else {
756            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
757        }
758    }
759
760    template <template <typename T> class Base>
761    StaticInstPtr
762    decodeNeonUTwoMiscUReg(unsigned size,
763                           ExtMachInst machInst, IntRegIndex dest,
764                           IntRegIndex op1)
765    {
766        switch (size) {
767          case 0:
768            return new Base<uint8_t>(machInst, dest, op1);
769          case 1:
770            return new Base<uint16_t>(machInst, dest, op1);
771          case 2:
772            return new Base<uint32_t>(machInst, dest, op1);
773          case 3:
774            return new Base<uint64_t>(machInst, dest, op1);
775          default:
776            return new Unknown(machInst);
777        }
778    }
779
780    template <template <typename T> class Base>
781    StaticInstPtr
782    decodeNeonSTwoMiscUReg(unsigned size,
783                           ExtMachInst machInst, IntRegIndex dest,
784                           IntRegIndex op1)
785    {
786        switch (size) {
787          case 0:
788            return new Base<int8_t>(machInst, dest, op1);
789          case 1:
790            return new Base<int16_t>(machInst, dest, op1);
791          case 2:
792            return new Base<int32_t>(machInst, dest, op1);
793          case 3:
794            return new Base<int64_t>(machInst, dest, op1);
795          default:
796            return new Unknown(machInst);
797        }
798    }
799
800    template <template <typename T> class BaseD,
801              template <typename T> class BaseQ>
802    StaticInstPtr
803    decodeNeonSTwoMiscReg(bool q, unsigned size,
804                          ExtMachInst machInst, IntRegIndex dest,
805                          IntRegIndex op1)
806    {
807        if (q) {
808            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
809        } else {
810            return decodeNeonSTwoMiscUReg<BaseD>(size, machInst, dest, op1);
811        }
812    }
813
814    template <template <typename T> class BaseD,
815              template <typename T> class BaseQ>
816    StaticInstPtr
817    decodeNeonUTwoMiscReg(bool q, unsigned size,
818                          ExtMachInst machInst, IntRegIndex dest,
819                          IntRegIndex op1)
820    {
821        if (q) {
822            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
823        } else {
824            return decodeNeonUTwoMiscUReg<BaseD>(size, machInst, dest, op1);
825        }
826    }
827
828    template <template <typename T> class BaseD,
829              template <typename T> class BaseQ>
830    StaticInstPtr
831    decodeNeonUSTwoMiscSReg(bool q, bool notSigned, unsigned size,
832                            ExtMachInst machInst, IntRegIndex dest,
833                            IntRegIndex op1)
834    {
835        if (notSigned) {
836            return decodeNeonUTwoShiftSReg<BaseD, BaseQ>(
837                    q, size, machInst, dest, op1);
838        } else {
839            return decodeNeonSTwoShiftSReg<BaseD, BaseQ>(
840                    q, size, machInst, dest, op1);
841        }
842    }
843
844    template <template <typename T> class BaseD,
845              template <typename T> class BaseQ>
846    StaticInstPtr
847    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
848                           IntRegIndex dest, IntRegIndex op1)
849    {
850        if (q) {
851            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
852        } else {
853            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
854        }
855    }
856
857    template <template <typename T> class BaseD,
858              template <typename T> class BaseQ>
859    StaticInstPtr
860    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
861                           IntRegIndex dest, IntRegIndex op1)
862    {
863        if (q) {
864            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
865        } else {
866            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
867        }
868    }
869
870    template <template <typename T> class BaseD,
871              template <typename T> class BaseQ>
872    StaticInstPtr
873    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
874                            IntRegIndex dest, IntRegIndex op1)
875    {
876        if (q) {
877            if (size)
878                return new BaseQ<uint64_t>(machInst, dest, op1);
879            else
880                return new BaseQ<uint32_t>(machInst, dest, op1);
881        } else {
882            if (size)
883                return new Unknown(machInst);
884            else
885                return new BaseD<uint32_t>(machInst, dest, op1);
886        }
887    }
888
889    template <template <typename T> class BaseD,
890              template <typename T> class BaseQ>
891    StaticInstPtr
892    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
893                                   IntRegIndex dest, IntRegIndex op1)
894    {
895        if (size)
896            return new BaseQ<uint64_t>(machInst, dest, op1);
897        else
898            return new BaseD<uint32_t>(machInst, dest, op1);
899    }
900
901    template <template <typename T> class Base>
902    StaticInstPtr
903    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
904                              IntRegIndex dest, IntRegIndex op1)
905    {
906        if (size)
907            return new Base<uint64_t>(machInst, dest, op1);
908        else
909            return new Base<uint32_t>(machInst, dest, op1);
910    }
911
912    template <template <typename T> class BaseD,
913              template <typename T> class BaseQ>
914    StaticInstPtr
915    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
916                              IntRegIndex dest, IntRegIndex op1)
917    {
918        if (q) {
919            switch (size) {
920              case 0x0:
921                return new BaseQ<uint8_t>(machInst, dest, op1);
922              case 0x1:
923                return new BaseQ<uint16_t>(machInst, dest, op1);
924              case 0x2:
925                return new BaseQ<uint32_t>(machInst, dest, op1);
926              default:
927                return new Unknown(machInst);
928            }
929        } else {
930            switch (size) {
931              case 0x0:
932                return new BaseD<uint8_t>(machInst, dest, op1);
933              case 0x1:
934                return new BaseD<uint16_t>(machInst, dest, op1);
935              default:
936                return new Unknown(machInst);
937            }
938        }
939    }
940
941    template <template <typename T> class BaseD,
942              template <typename T> class BaseQ,
943              template <typename T> class BaseBQ>
944    StaticInstPtr
945    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
946                              IntRegIndex dest, IntRegIndex op1)
947    {
948        if (q) {
949            switch (size) {
950              case 0x0:
951                return new BaseQ<uint8_t>(machInst, dest, op1);
952              case 0x1:
953                return new BaseQ<uint16_t>(machInst, dest, op1);
954              case 0x2:
955                return new BaseBQ<uint32_t>(machInst, dest, op1);
956              default:
957                return new Unknown(machInst);
958            }
959        } else {
960            switch (size) {
961              case 0x0:
962                return new BaseD<uint8_t>(machInst, dest, op1);
963              case 0x1:
964                return new BaseD<uint16_t>(machInst, dest, op1);
965              default:
966                return new Unknown(machInst);
967            }
968        }
969    }
970
971    template <template <typename T> class BaseD,
972              template <typename T> class BaseQ>
973    StaticInstPtr
974    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
975                              IntRegIndex dest, IntRegIndex op1)
976    {
977        if (q) {
978            switch (size) {
979              case 0x0:
980                return new BaseQ<int8_t>(machInst, dest, op1);
981              case 0x1:
982                return new BaseQ<int16_t>(machInst, dest, op1);
983              case 0x2:
984                return new BaseQ<int32_t>(machInst, dest, op1);
985              default:
986                return new Unknown(machInst);
987            }
988        } else {
989            switch (size) {
990              case 0x0:
991                return new BaseD<int8_t>(machInst, dest, op1);
992              case 0x1:
993                return new BaseD<int16_t>(machInst, dest, op1);
994              default:
995                return new Unknown(machInst);
996            }
997        }
998    }
999
1000    template <template <typename T> class BaseD,
1001              template <typename T> class BaseQ,
1002              template <typename T> class BaseBQ>
1003    StaticInstPtr
1004    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1005                                  IntRegIndex dest, IntRegIndex op1)
1006    {
1007        if (q) {
1008            switch (size) {
1009              case 0x0:
1010                return new BaseQ<uint8_t>(machInst, dest, op1);
1011              case 0x1:
1012                return new BaseQ<uint16_t>(machInst, dest, op1);
1013              case 0x2:
1014                return new BaseBQ<uint32_t>(machInst, dest, op1);
1015              default:
1016                return new Unknown(machInst);
1017            }
1018        } else {
1019            switch (size) {
1020              case 0x0:
1021                return new BaseD<uint8_t>(machInst, dest, op1);
1022              case 0x1:
1023                return new BaseD<uint16_t>(machInst, dest, op1);
1024              default:
1025                return new Unknown(machInst);
1026            }
1027        }
1028    }
1029
1030    template <template <typename T> class BaseD,
1031              template <typename T> class BaseQ,
1032              template <typename T> class BaseBQ>
1033    StaticInstPtr
1034    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
1035                                  IntRegIndex dest, IntRegIndex op1)
1036    {
1037        if (q) {
1038            switch (size) {
1039              case 0x0:
1040                return new BaseQ<int8_t>(machInst, dest, op1);
1041              case 0x1:
1042                return new BaseQ<int16_t>(machInst, dest, op1);
1043              case 0x2:
1044                return new BaseBQ<int32_t>(machInst, dest, op1);
1045              default:
1046                return new Unknown(machInst);
1047            }
1048        } else {
1049            switch (size) {
1050              case 0x0:
1051                return new BaseD<int8_t>(machInst, dest, op1);
1052              case 0x1:
1053                return new BaseD<int16_t>(machInst, dest, op1);
1054              default:
1055                return new Unknown(machInst);
1056            }
1057        }
1058    }
1059}};
1060
1061output exec {{
1062    static float
1063    vcgtFunc(float op1, float op2)
1064    {
1065        if (std::isnan(op1) || std::isnan(op2))
1066            return 2.0;
1067        return (op1 > op2) ? 0.0 : 1.0;
1068    }
1069
1070    static float
1071    vcgeFunc(float op1, float op2)
1072    {
1073        if (std::isnan(op1) || std::isnan(op2))
1074            return 2.0;
1075        return (op1 >= op2) ? 0.0 : 1.0;
1076    }
1077
1078    static float
1079    vceqFunc(float op1, float op2)
1080    {
1081        if (isSnan(op1) || isSnan(op2))
1082            return 2.0;
1083        return (op1 == op2) ? 0.0 : 1.0;
1084    }
1085
1086    static float
1087    vcleFunc(float op1, float op2)
1088    {
1089        if (std::isnan(op1) || std::isnan(op2))
1090            return 2.0;
1091        return (op1 <= op2) ? 0.0 : 1.0;
1092    }
1093
1094    static float
1095    vcltFunc(float op1, float op2)
1096    {
1097        if (std::isnan(op1) || std::isnan(op2))
1098            return 2.0;
1099        return (op1 < op2) ? 0.0 : 1.0;
1100    }
1101
1102    static float
1103    vacgtFunc(float op1, float op2)
1104    {
1105        if (std::isnan(op1) || std::isnan(op2))
1106            return 2.0;
1107        return (fabsf(op1) > fabsf(op2)) ? 0.0 : 1.0;
1108    }
1109
1110    static float
1111    vacgeFunc(float op1, float op2)
1112    {
1113        if (std::isnan(op1) || std::isnan(op2))
1114            return 2.0;
1115        return (fabsf(op1) >= fabsf(op2)) ? 0.0 : 1.0;
1116    }
1117}};
1118
1119let {{
1120
1121    header_output = ""
1122    exec_output = ""
1123
1124    smallUnsignedTypes = ("uint8_t", "uint16_t", "uint32_t")
1125    unsignedTypes = smallUnsignedTypes + ("uint64_t",)
1126    smallSignedTypes = ("int8_t", "int16_t", "int32_t")
1127    signedTypes = smallSignedTypes + ("int64_t",)
1128    smallTypes = smallUnsignedTypes + smallSignedTypes
1129    allTypes = unsignedTypes + signedTypes
1130
1131    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
1132                          readDest=False, pairwise=False):
1133        global header_output, exec_output
1134        eWalkCode = simdEnabledCheckCode + '''
1135        RegVect srcReg1, srcReg2, destReg;
1136        '''
1137        for reg in range(rCount):
1138            eWalkCode += '''
1139                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1140                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1141            ''' % { "reg" : reg }
1142            if readDest:
1143                eWalkCode += '''
1144                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1145                ''' % { "reg" : reg }
1146        readDestCode = ''
1147        if readDest:
1148            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1149        if pairwise:
1150            eWalkCode += '''
1151            for (unsigned i = 0; i < eCount; i++) {
1152                Element srcElem1 = gtoh(2 * i < eCount ?
1153                                        srcReg1.elements[2 * i] :
1154                                        srcReg2.elements[2 * i - eCount]);
1155                Element srcElem2 = gtoh(2 * i < eCount ?
1156                                        srcReg1.elements[2 * i + 1] :
1157                                        srcReg2.elements[2 * i + 1 - eCount]);
1158                Element destElem;
1159                %(readDest)s
1160                %(op)s
1161                destReg.elements[i] = htog(destElem);
1162            }
1163            ''' % { "op" : op, "readDest" : readDestCode }
1164        else:
1165            eWalkCode += '''
1166            for (unsigned i = 0; i < eCount; i++) {
1167                Element srcElem1 = gtoh(srcReg1.elements[i]);
1168                Element srcElem2 = gtoh(srcReg2.elements[i]);
1169                Element destElem;
1170                %(readDest)s
1171                %(op)s
1172                destReg.elements[i] = htog(destElem);
1173            }
1174            ''' % { "op" : op, "readDest" : readDestCode }
1175        for reg in range(rCount):
1176            eWalkCode += '''
1177            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1178            ''' % { "reg" : reg }
1179        iop = InstObjParams(name, Name,
1180                            "RegRegRegOp",
1181                            { "code": eWalkCode,
1182                              "r_count": rCount,
1183                              "predicate_test": predicateTest,
1184                              "op_class": opClass }, [])
1185        header_output += NeonRegRegRegOpDeclare.subst(iop)
1186        exec_output += NeonEqualRegExecute.subst(iop)
1187        for type in types:
1188            substDict = { "targs" : type,
1189                          "class_name" : Name }
1190            exec_output += NeonExecDeclare.subst(substDict)
1191
1192    def threeEqualRegInstFp(name, Name, opClass, types, rCount, op,
1193                            readDest=False, pairwise=False, toInt=False):
1194        global header_output, exec_output
1195        eWalkCode = simdEnabledCheckCode + '''
1196        typedef FloatReg FloatVect[rCount];
1197        FloatVect srcRegs1, srcRegs2;
1198        '''
1199        if toInt:
1200            eWalkCode += 'RegVect destRegs;\n'
1201        else:
1202            eWalkCode += 'FloatVect destRegs;\n'
1203        for reg in range(rCount):
1204            eWalkCode += '''
1205                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1206                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1207            ''' % { "reg" : reg }
1208            if readDest:
1209                if toInt:
1210                    eWalkCode += '''
1211                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1212                    ''' % { "reg" : reg }
1213                else:
1214                    eWalkCode += '''
1215                        destRegs[%(reg)d] = FpDestP%(reg)d;
1216                    ''' % { "reg" : reg }
1217        readDestCode = ''
1218        if readDest:
1219            readDestCode = 'destReg = destRegs[r];'
1220        destType = 'FloatReg'
1221        writeDest = 'destRegs[r] = destReg;'
1222        if toInt:
1223            destType = 'FloatRegBits'
1224            writeDest = 'destRegs.regs[r] = destReg;'
1225        if pairwise:
1226            eWalkCode += '''
1227            for (unsigned r = 0; r < rCount; r++) {
1228                FloatReg srcReg1 = (2 * r < rCount) ?
1229                    srcRegs1[2 * r] : srcRegs2[2 * r - rCount];
1230                FloatReg srcReg2 = (2 * r < rCount) ?
1231                    srcRegs1[2 * r + 1] : srcRegs2[2 * r + 1 - rCount];
1232                %(destType)s destReg;
1233                %(readDest)s
1234                %(op)s
1235                %(writeDest)s
1236            }
1237            ''' % { "op" : op,
1238                    "readDest" : readDestCode,
1239                    "destType" : destType,
1240                    "writeDest" : writeDest }
1241        else:
1242            eWalkCode += '''
1243            for (unsigned r = 0; r < rCount; r++) {
1244                FloatReg srcReg1 = srcRegs1[r];
1245                FloatReg srcReg2 = srcRegs2[r];
1246                %(destType)s destReg;
1247                %(readDest)s
1248                %(op)s
1249                %(writeDest)s
1250            }
1251            ''' % { "op" : op,
1252                    "readDest" : readDestCode,
1253                    "destType" : destType,
1254                    "writeDest" : writeDest }
1255        for reg in range(rCount):
1256            if toInt:
1257                eWalkCode += '''
1258                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1259                ''' % { "reg" : reg }
1260            else:
1261                eWalkCode += '''
1262                FpDestP%(reg)d = destRegs[%(reg)d];
1263                ''' % { "reg" : reg }
1264        iop = InstObjParams(name, Name,
1265                            "FpRegRegRegOp",
1266                            { "code": eWalkCode,
1267                              "r_count": rCount,
1268                              "predicate_test": predicateTest,
1269                              "op_class": opClass }, [])
1270        header_output += NeonRegRegRegOpDeclare.subst(iop)
1271        exec_output += NeonEqualRegExecute.subst(iop)
1272        for type in types:
1273            substDict = { "targs" : type,
1274                          "class_name" : Name }
1275            exec_output += NeonExecDeclare.subst(substDict)
1276
1277    def threeUnequalRegInst(name, Name, opClass, types, op,
1278                            bigSrc1, bigSrc2, bigDest, readDest):
1279        global header_output, exec_output
1280        src1Cnt = src2Cnt = destCnt = 2
1281        src1Prefix = src2Prefix = destPrefix = ''
1282        if bigSrc1:
1283            src1Cnt = 4
1284            src1Prefix = 'Big'
1285        if bigSrc2:
1286            src2Cnt = 4
1287            src2Prefix = 'Big'
1288        if bigDest:
1289            destCnt = 4
1290            destPrefix = 'Big'
1291        eWalkCode = simdEnabledCheckCode + '''
1292            %sRegVect srcReg1;
1293            %sRegVect srcReg2;
1294            %sRegVect destReg;
1295        ''' % (src1Prefix, src2Prefix, destPrefix)
1296        for reg in range(src1Cnt):
1297            eWalkCode += '''
1298                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1299            ''' % { "reg" : reg }
1300        for reg in range(src2Cnt):
1301            eWalkCode += '''
1302                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1303            ''' % { "reg" : reg }
1304        if readDest:
1305            for reg in range(destCnt):
1306                eWalkCode += '''
1307                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1308                ''' % { "reg" : reg }
1309        readDestCode = ''
1310        if readDest:
1311            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1312        eWalkCode += '''
1313        for (unsigned i = 0; i < eCount; i++) {
1314            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
1315            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[i]);
1316            %(destPrefix)sElement destElem;
1317            %(readDest)s
1318            %(op)s
1319            destReg.elements[i] = htog(destElem);
1320        }
1321        ''' % { "op" : op, "readDest" : readDestCode,
1322                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
1323                "destPrefix" : destPrefix }
1324        for reg in range(destCnt):
1325            eWalkCode += '''
1326            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1327            ''' % { "reg" : reg }
1328        iop = InstObjParams(name, Name,
1329                            "RegRegRegOp",
1330                            { "code": eWalkCode,
1331                              "r_count": 2,
1332                              "predicate_test": predicateTest,
1333                              "op_class": opClass }, [])
1334        header_output += NeonRegRegRegOpDeclare.subst(iop)
1335        exec_output += NeonUnequalRegExecute.subst(iop)
1336        for type in types:
1337            substDict = { "targs" : type,
1338                          "class_name" : Name }
1339            exec_output += NeonExecDeclare.subst(substDict)
1340
1341    def threeRegNarrowInst(name, Name, opClass, types, op, readDest=False):
1342        threeUnequalRegInst(name, Name, opClass, types, op,
1343                            True, True, False, readDest)
1344
1345    def threeRegLongInst(name, Name, opClass, types, op, readDest=False):
1346        threeUnequalRegInst(name, Name, opClass, types, op,
1347                            False, False, True, readDest)
1348
1349    def threeRegWideInst(name, Name, opClass, types, op, readDest=False):
1350        threeUnequalRegInst(name, Name, opClass, types, op,
1351                            True, False, True, readDest)
1352
1353    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
1354        global header_output, exec_output
1355        eWalkCode = simdEnabledCheckCode + '''
1356        RegVect srcReg1, srcReg2, destReg;
1357        '''
1358        for reg in range(rCount):
1359            eWalkCode += '''
1360                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1361                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
1362            ''' % { "reg" : reg }
1363            if readDest:
1364                eWalkCode += '''
1365                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1366                ''' % { "reg" : reg }
1367        readDestCode = ''
1368        if readDest:
1369            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1370        eWalkCode += '''
1371        if (imm < 0 && imm >= eCount) {
1372            fault = new UndefinedInstruction(machInst, false, mnemonic);
1373        } else {
1374            for (unsigned i = 0; i < eCount; i++) {
1375                Element srcElem1 = gtoh(srcReg1.elements[i]);
1376                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1377                Element destElem;
1378                %(readDest)s
1379                %(op)s
1380                destReg.elements[i] = htog(destElem);
1381            }
1382        }
1383        ''' % { "op" : op, "readDest" : readDestCode }
1384        for reg in range(rCount):
1385            eWalkCode += '''
1386            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1387            ''' % { "reg" : reg }
1388        iop = InstObjParams(name, Name,
1389                            "RegRegRegImmOp",
1390                            { "code": eWalkCode,
1391                              "r_count": rCount,
1392                              "predicate_test": predicateTest,
1393                              "op_class": opClass }, [])
1394        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1395        exec_output += NeonEqualRegExecute.subst(iop)
1396        for type in types:
1397            substDict = { "targs" : type,
1398                          "class_name" : Name }
1399            exec_output += NeonExecDeclare.subst(substDict)
1400
1401    def twoRegLongInst(name, Name, opClass, types, op, readDest=False):
1402        global header_output, exec_output
1403        rCount = 2
1404        eWalkCode = simdEnabledCheckCode + '''
1405        RegVect srcReg1, srcReg2;
1406        BigRegVect destReg;
1407        '''
1408        for reg in range(rCount):
1409            eWalkCode += '''
1410                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1411                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);;
1412            ''' % { "reg" : reg }
1413        if readDest:
1414            for reg in range(2 * rCount):
1415                eWalkCode += '''
1416                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1417                ''' % { "reg" : reg }
1418        readDestCode = ''
1419        if readDest:
1420            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1421        eWalkCode += '''
1422        if (imm < 0 && imm >= eCount) {
1423            fault = new UndefinedInstruction(machInst, false, mnemonic);
1424        } else {
1425            for (unsigned i = 0; i < eCount; i++) {
1426                Element srcElem1 = gtoh(srcReg1.elements[i]);
1427                Element srcElem2 = gtoh(srcReg2.elements[imm]);
1428                BigElement destElem;
1429                %(readDest)s
1430                %(op)s
1431                destReg.elements[i] = htog(destElem);
1432            }
1433        }
1434        ''' % { "op" : op, "readDest" : readDestCode }
1435        for reg in range(2 * rCount):
1436            eWalkCode += '''
1437            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1438            ''' % { "reg" : reg }
1439        iop = InstObjParams(name, Name,
1440                            "RegRegRegImmOp",
1441                            { "code": eWalkCode,
1442                              "r_count": rCount,
1443                              "predicate_test": predicateTest,
1444                              "op_class": opClass }, [])
1445        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1446        exec_output += NeonUnequalRegExecute.subst(iop)
1447        for type in types:
1448            substDict = { "targs" : type,
1449                          "class_name" : Name }
1450            exec_output += NeonExecDeclare.subst(substDict)
1451
1452    def twoEqualRegInstFp(name, Name, opClass, types, rCount, op, readDest=False):
1453        global header_output, exec_output
1454        eWalkCode = simdEnabledCheckCode + '''
1455        typedef FloatReg FloatVect[rCount];
1456        FloatVect srcRegs1, srcRegs2, destRegs;
1457        '''
1458        for reg in range(rCount):
1459            eWalkCode += '''
1460                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1461                srcRegs2[%(reg)d] = FpOp2P%(reg)d;
1462            ''' % { "reg" : reg }
1463            if readDest:
1464                eWalkCode += '''
1465                    destRegs[%(reg)d] = FpDestP%(reg)d;
1466                ''' % { "reg" : reg }
1467        readDestCode = ''
1468        if readDest:
1469            readDestCode = 'destReg = destRegs[i];'
1470        eWalkCode += '''
1471        if (imm < 0 && imm >= eCount) {
1472            fault = new UndefinedInstruction(machInst, false, mnemonic);
1473        } else {
1474            for (unsigned i = 0; i < rCount; i++) {
1475                FloatReg srcReg1 = srcRegs1[i];
1476                FloatReg srcReg2 = srcRegs2[imm];
1477                FloatReg destReg;
1478                %(readDest)s
1479                %(op)s
1480                destRegs[i] = destReg;
1481            }
1482        }
1483        ''' % { "op" : op, "readDest" : readDestCode }
1484        for reg in range(rCount):
1485            eWalkCode += '''
1486            FpDestP%(reg)d = destRegs[%(reg)d];
1487            ''' % { "reg" : reg }
1488        iop = InstObjParams(name, Name,
1489                            "FpRegRegRegImmOp",
1490                            { "code": eWalkCode,
1491                              "r_count": rCount,
1492                              "predicate_test": predicateTest,
1493                              "op_class": opClass }, [])
1494        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
1495        exec_output += NeonEqualRegExecute.subst(iop)
1496        for type in types:
1497            substDict = { "targs" : type,
1498                          "class_name" : Name }
1499            exec_output += NeonExecDeclare.subst(substDict)
1500
1501    def twoRegShiftInst(name, Name, opClass, types, rCount, op,
1502            readDest=False, toInt=False, fromInt=False):
1503        global header_output, exec_output
1504        eWalkCode = simdEnabledCheckCode + '''
1505        RegVect srcRegs1, destRegs;
1506        '''
1507        for reg in range(rCount):
1508            eWalkCode += '''
1509                srcRegs1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1510            ''' % { "reg" : reg }
1511            if readDest:
1512                eWalkCode += '''
1513                    destRegs.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1514                ''' % { "reg" : reg }
1515        readDestCode = ''
1516        if readDest:
1517            readDestCode = 'destElem = gtoh(destRegs.elements[i]);'
1518            if toInt:
1519                readDestCode = 'destReg = gtoh(destRegs.regs[i]);'
1520        readOpCode = 'Element srcElem1 = gtoh(srcRegs1.elements[i]);'
1521        if fromInt:
1522            readOpCode = 'FloatRegBits srcReg1 = gtoh(srcRegs1.regs[i]);'
1523        declDest = 'Element destElem;'
1524        writeDestCode = 'destRegs.elements[i] = htog(destElem);'
1525        if toInt:
1526            declDest = 'FloatRegBits destReg;'
1527            writeDestCode = 'destRegs.regs[i] = htog(destReg);'
1528        eWalkCode += '''
1529        for (unsigned i = 0; i < eCount; i++) {
1530            %(readOp)s
1531            %(declDest)s
1532            %(readDest)s
1533            %(op)s
1534            %(writeDest)s
1535        }
1536        ''' % { "readOp" : readOpCode,
1537                "declDest" : declDest,
1538                "readDest" : readDestCode,
1539                "op" : op,
1540                "writeDest" : writeDestCode }
1541        for reg in range(rCount):
1542            eWalkCode += '''
1543            FpDestP%(reg)d_uw = gtoh(destRegs.regs[%(reg)d]);
1544            ''' % { "reg" : reg }
1545        iop = InstObjParams(name, Name,
1546                            "RegRegImmOp",
1547                            { "code": eWalkCode,
1548                              "r_count": rCount,
1549                              "predicate_test": predicateTest,
1550                              "op_class": opClass }, [])
1551        header_output += NeonRegRegImmOpDeclare.subst(iop)
1552        exec_output += NeonEqualRegExecute.subst(iop)
1553        for type in types:
1554            substDict = { "targs" : type,
1555                          "class_name" : Name }
1556            exec_output += NeonExecDeclare.subst(substDict)
1557
1558    def twoRegNarrowShiftInst(name, Name, opClass, types, op, readDest=False):
1559        global header_output, exec_output
1560        eWalkCode = simdEnabledCheckCode + '''
1561        BigRegVect srcReg1;
1562        RegVect destReg;
1563        '''
1564        for reg in range(4):
1565            eWalkCode += '''
1566                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1567            ''' % { "reg" : reg }
1568        if readDest:
1569            for reg in range(2):
1570                eWalkCode += '''
1571                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1572                ''' % { "reg" : reg }
1573        readDestCode = ''
1574        if readDest:
1575            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1576        eWalkCode += '''
1577        for (unsigned i = 0; i < eCount; i++) {
1578            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1579            Element destElem;
1580            %(readDest)s
1581            %(op)s
1582            destReg.elements[i] = htog(destElem);
1583        }
1584        ''' % { "op" : op, "readDest" : readDestCode }
1585        for reg in range(2):
1586            eWalkCode += '''
1587            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1588            ''' % { "reg" : reg }
1589        iop = InstObjParams(name, Name,
1590                            "RegRegImmOp",
1591                            { "code": eWalkCode,
1592                              "r_count": 2,
1593                              "predicate_test": predicateTest,
1594                              "op_class": opClass }, [])
1595        header_output += NeonRegRegImmOpDeclare.subst(iop)
1596        exec_output += NeonUnequalRegExecute.subst(iop)
1597        for type in types:
1598            substDict = { "targs" : type,
1599                          "class_name" : Name }
1600            exec_output += NeonExecDeclare.subst(substDict)
1601
1602    def twoRegLongShiftInst(name, Name, opClass, types, op, readDest=False):
1603        global header_output, exec_output
1604        eWalkCode = simdEnabledCheckCode + '''
1605        RegVect srcReg1;
1606        BigRegVect destReg;
1607        '''
1608        for reg in range(2):
1609            eWalkCode += '''
1610                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1611            ''' % { "reg" : reg }
1612        if readDest:
1613            for reg in range(4):
1614                eWalkCode += '''
1615                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1616                ''' % { "reg" : reg }
1617        readDestCode = ''
1618        if readDest:
1619            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1620        eWalkCode += '''
1621        for (unsigned i = 0; i < eCount; i++) {
1622            Element srcElem1 = gtoh(srcReg1.elements[i]);
1623            BigElement destElem;
1624            %(readDest)s
1625            %(op)s
1626            destReg.elements[i] = htog(destElem);
1627        }
1628        ''' % { "op" : op, "readDest" : readDestCode }
1629        for reg in range(4):
1630            eWalkCode += '''
1631            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1632            ''' % { "reg" : reg }
1633        iop = InstObjParams(name, Name,
1634                            "RegRegImmOp",
1635                            { "code": eWalkCode,
1636                              "r_count": 2,
1637                              "predicate_test": predicateTest,
1638                              "op_class": opClass }, [])
1639        header_output += NeonRegRegImmOpDeclare.subst(iop)
1640        exec_output += NeonUnequalRegExecute.subst(iop)
1641        for type in types:
1642            substDict = { "targs" : type,
1643                          "class_name" : Name }
1644            exec_output += NeonExecDeclare.subst(substDict)
1645
1646    def twoRegMiscInst(name, Name, opClass, types, rCount, op, readDest=False):
1647        global header_output, exec_output
1648        eWalkCode = simdEnabledCheckCode + '''
1649        RegVect srcReg1, destReg;
1650        '''
1651        for reg in range(rCount):
1652            eWalkCode += '''
1653                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1654            ''' % { "reg" : reg }
1655            if readDest:
1656                eWalkCode += '''
1657                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1658                ''' % { "reg" : reg }
1659        readDestCode = ''
1660        if readDest:
1661            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1662        eWalkCode += '''
1663        for (unsigned i = 0; i < eCount; i++) {
1664            unsigned j = i;
1665            Element srcElem1 = gtoh(srcReg1.elements[i]);
1666            Element destElem;
1667            %(readDest)s
1668            %(op)s
1669            destReg.elements[j] = htog(destElem);
1670        }
1671        ''' % { "op" : op, "readDest" : readDestCode }
1672        for reg in range(rCount):
1673            eWalkCode += '''
1674            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1675            ''' % { "reg" : reg }
1676        iop = InstObjParams(name, Name,
1677                            "RegRegOp",
1678                            { "code": eWalkCode,
1679                              "r_count": rCount,
1680                              "predicate_test": predicateTest,
1681                              "op_class": opClass }, [])
1682        header_output += NeonRegRegOpDeclare.subst(iop)
1683        exec_output += NeonEqualRegExecute.subst(iop)
1684        for type in types:
1685            substDict = { "targs" : type,
1686                          "class_name" : Name }
1687            exec_output += NeonExecDeclare.subst(substDict)
1688
1689    def twoRegMiscScInst(name, Name, opClass, types, rCount, op, readDest=False):
1690        global header_output, exec_output
1691        eWalkCode = simdEnabledCheckCode + '''
1692        RegVect srcReg1, destReg;
1693        '''
1694        for reg in range(rCount):
1695            eWalkCode += '''
1696                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1697            ''' % { "reg" : reg }
1698            if readDest:
1699                eWalkCode += '''
1700                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1701                ''' % { "reg" : reg }
1702        readDestCode = ''
1703        if readDest:
1704            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1705        eWalkCode += '''
1706        for (unsigned i = 0; i < eCount; i++) {
1707            Element srcElem1 = gtoh(srcReg1.elements[imm]);
1708            Element destElem;
1709            %(readDest)s
1710            %(op)s
1711            destReg.elements[i] = htog(destElem);
1712        }
1713        ''' % { "op" : op, "readDest" : readDestCode }
1714        for reg in range(rCount):
1715            eWalkCode += '''
1716            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1717            ''' % { "reg" : reg }
1718        iop = InstObjParams(name, Name,
1719                            "RegRegImmOp",
1720                            { "code": eWalkCode,
1721                              "r_count": rCount,
1722                              "predicate_test": predicateTest,
1723                              "op_class": opClass }, [])
1724        header_output += NeonRegRegImmOpDeclare.subst(iop)
1725        exec_output += NeonEqualRegExecute.subst(iop)
1726        for type in types:
1727            substDict = { "targs" : type,
1728                          "class_name" : Name }
1729            exec_output += NeonExecDeclare.subst(substDict)
1730
1731    def twoRegMiscScramble(name, Name, opClass, types, rCount, op, readDest=False):
1732        global header_output, exec_output
1733        eWalkCode = simdEnabledCheckCode + '''
1734        RegVect srcReg1, destReg;
1735        '''
1736        for reg in range(rCount):
1737            eWalkCode += '''
1738                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1739                destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1740            ''' % { "reg" : reg }
1741            if readDest:
1742                eWalkCode += '''
1743                ''' % { "reg" : reg }
1744        readDestCode = ''
1745        if readDest:
1746            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1747        eWalkCode += op
1748        for reg in range(rCount):
1749            eWalkCode += '''
1750            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1751            FpOp1P%(reg)d_uw = gtoh(srcReg1.regs[%(reg)d]);
1752            ''' % { "reg" : reg }
1753        iop = InstObjParams(name, Name,
1754                            "RegRegOp",
1755                            { "code": eWalkCode,
1756                              "r_count": rCount,
1757                              "predicate_test": predicateTest,
1758                              "op_class": opClass }, [])
1759        header_output += NeonRegRegOpDeclare.subst(iop)
1760        exec_output += NeonEqualRegExecute.subst(iop)
1761        for type in types:
1762            substDict = { "targs" : type,
1763                          "class_name" : Name }
1764            exec_output += NeonExecDeclare.subst(substDict)
1765
1766    def twoRegMiscInstFp(name, Name, opClass, types, rCount, op,
1767            readDest=False, toInt=False):
1768        global header_output, exec_output
1769        eWalkCode = simdEnabledCheckCode + '''
1770        typedef FloatReg FloatVect[rCount];
1771        FloatVect srcRegs1;
1772        '''
1773        if toInt:
1774            eWalkCode += 'RegVect destRegs;\n'
1775        else:
1776            eWalkCode += 'FloatVect destRegs;\n'
1777        for reg in range(rCount):
1778            eWalkCode += '''
1779                srcRegs1[%(reg)d] = FpOp1P%(reg)d;
1780            ''' % { "reg" : reg }
1781            if readDest:
1782                if toInt:
1783                    eWalkCode += '''
1784                        destRegs.regs[%(reg)d] = FpDestP%(reg)d.bits;
1785                    ''' % { "reg" : reg }
1786                else:
1787                    eWalkCode += '''
1788                        destRegs[%(reg)d] = FpDestP%(reg)d;
1789                    ''' % { "reg" : reg }
1790        readDestCode = ''
1791        if readDest:
1792            readDestCode = 'destReg = destRegs[i];'
1793        destType = 'FloatReg'
1794        writeDest = 'destRegs[r] = destReg;'
1795        if toInt:
1796            destType = 'FloatRegBits'
1797            writeDest = 'destRegs.regs[r] = destReg;'
1798        eWalkCode += '''
1799        for (unsigned r = 0; r < rCount; r++) {
1800            FloatReg srcReg1 = srcRegs1[r];
1801            %(destType)s destReg;
1802            %(readDest)s
1803            %(op)s
1804            %(writeDest)s
1805        }
1806        ''' % { "op" : op,
1807                "readDest" : readDestCode,
1808                "destType" : destType,
1809                "writeDest" : writeDest }
1810        for reg in range(rCount):
1811            if toInt:
1812                eWalkCode += '''
1813                FpDestP%(reg)d_uw = destRegs.regs[%(reg)d];
1814                ''' % { "reg" : reg }
1815            else:
1816                eWalkCode += '''
1817                FpDestP%(reg)d = destRegs[%(reg)d];
1818                ''' % { "reg" : reg }
1819        iop = InstObjParams(name, Name,
1820                            "FpRegRegOp",
1821                            { "code": eWalkCode,
1822                              "r_count": rCount,
1823                              "predicate_test": predicateTest,
1824                              "op_class": opClass }, [])
1825        header_output += NeonRegRegOpDeclare.subst(iop)
1826        exec_output += NeonEqualRegExecute.subst(iop)
1827        for type in types:
1828            substDict = { "targs" : type,
1829                          "class_name" : Name }
1830            exec_output += NeonExecDeclare.subst(substDict)
1831
1832    def twoRegCondenseInst(name, Name, opClass, types, rCount, op, readDest=False):
1833        global header_output, exec_output
1834        eWalkCode = simdEnabledCheckCode + '''
1835        RegVect srcRegs;
1836        BigRegVect destReg;
1837        '''
1838        for reg in range(rCount):
1839            eWalkCode += '''
1840                srcRegs.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1841            ''' % { "reg" : reg }
1842            if readDest:
1843                eWalkCode += '''
1844                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1845                ''' % { "reg" : reg }
1846        readDestCode = ''
1847        if readDest:
1848            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1849        eWalkCode += '''
1850        for (unsigned i = 0; i < eCount / 2; i++) {
1851            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
1852            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
1853            BigElement destElem;
1854            %(readDest)s
1855            %(op)s
1856            destReg.elements[i] = htog(destElem);
1857        }
1858        ''' % { "op" : op, "readDest" : readDestCode }
1859        for reg in range(rCount):
1860            eWalkCode += '''
1861            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1862            ''' % { "reg" : reg }
1863        iop = InstObjParams(name, Name,
1864                            "RegRegOp",
1865                            { "code": eWalkCode,
1866                              "r_count": rCount,
1867                              "predicate_test": predicateTest,
1868                              "op_class": opClass }, [])
1869        header_output += NeonRegRegOpDeclare.subst(iop)
1870        exec_output += NeonUnequalRegExecute.subst(iop)
1871        for type in types:
1872            substDict = { "targs" : type,
1873                          "class_name" : Name }
1874            exec_output += NeonExecDeclare.subst(substDict)
1875
1876    def twoRegNarrowMiscInst(name, Name, opClass, types, op, readDest=False):
1877        global header_output, exec_output
1878        eWalkCode = simdEnabledCheckCode + '''
1879        BigRegVect srcReg1;
1880        RegVect destReg;
1881        '''
1882        for reg in range(4):
1883            eWalkCode += '''
1884                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1885            ''' % { "reg" : reg }
1886        if readDest:
1887            for reg in range(2):
1888                eWalkCode += '''
1889                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1890                ''' % { "reg" : reg }
1891        readDestCode = ''
1892        if readDest:
1893            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1894        eWalkCode += '''
1895        for (unsigned i = 0; i < eCount; i++) {
1896            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
1897            Element destElem;
1898            %(readDest)s
1899            %(op)s
1900            destReg.elements[i] = htog(destElem);
1901        }
1902        ''' % { "op" : op, "readDest" : readDestCode }
1903        for reg in range(2):
1904            eWalkCode += '''
1905            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1906            ''' % { "reg" : reg }
1907        iop = InstObjParams(name, Name,
1908                            "RegRegOp",
1909                            { "code": eWalkCode,
1910                              "r_count": 2,
1911                              "predicate_test": predicateTest,
1912                              "op_class": opClass }, [])
1913        header_output += NeonRegRegOpDeclare.subst(iop)
1914        exec_output += NeonUnequalRegExecute.subst(iop)
1915        for type in types:
1916            substDict = { "targs" : type,
1917                          "class_name" : Name }
1918            exec_output += NeonExecDeclare.subst(substDict)
1919
1920    def oneRegImmInst(name, Name, opClass, types, rCount, op, readDest=False):
1921        global header_output, exec_output
1922        eWalkCode = simdEnabledCheckCode + '''
1923        RegVect destReg;
1924        '''
1925        if readDest:
1926            for reg in range(rCount):
1927                eWalkCode += '''
1928                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1929                ''' % { "reg" : reg }
1930        readDestCode = ''
1931        if readDest:
1932            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
1933        eWalkCode += '''
1934        for (unsigned i = 0; i < eCount; i++) {
1935            Element destElem;
1936            %(readDest)s
1937            %(op)s
1938            destReg.elements[i] = htog(destElem);
1939        }
1940        ''' % { "op" : op, "readDest" : readDestCode }
1941        for reg in range(rCount):
1942            eWalkCode += '''
1943            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1944            ''' % { "reg" : reg }
1945        iop = InstObjParams(name, Name,
1946                            "RegImmOp",
1947                            { "code": eWalkCode,
1948                              "r_count": rCount,
1949                              "predicate_test": predicateTest,
1950                              "op_class": opClass }, [])
1951        header_output += NeonRegImmOpDeclare.subst(iop)
1952        exec_output += NeonEqualRegExecute.subst(iop)
1953        for type in types:
1954            substDict = { "targs" : type,
1955                          "class_name" : Name }
1956            exec_output += NeonExecDeclare.subst(substDict)
1957
1958    def twoRegLongMiscInst(name, Name, opClass, types, op, readDest=False):
1959        global header_output, exec_output
1960        eWalkCode = simdEnabledCheckCode + '''
1961        RegVect srcReg1;
1962        BigRegVect destReg;
1963        '''
1964        for reg in range(2):
1965            eWalkCode += '''
1966                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
1967            ''' % { "reg" : reg }
1968        if readDest:
1969            for reg in range(4):
1970                eWalkCode += '''
1971                    destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
1972                ''' % { "reg" : reg }
1973        readDestCode = ''
1974        if readDest:
1975            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
1976        eWalkCode += '''
1977        for (unsigned i = 0; i < eCount; i++) {
1978            Element srcElem1 = gtoh(srcReg1.elements[i]);
1979            BigElement destElem;
1980            %(readDest)s
1981            %(op)s
1982            destReg.elements[i] = htog(destElem);
1983        }
1984        ''' % { "op" : op, "readDest" : readDestCode }
1985        for reg in range(4):
1986            eWalkCode += '''
1987            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
1988            ''' % { "reg" : reg }
1989        iop = InstObjParams(name, Name,
1990                            "RegRegOp",
1991                            { "code": eWalkCode,
1992                              "r_count": 2,
1993                              "predicate_test": predicateTest,
1994                              "op_class": opClass }, [])
1995        header_output += NeonRegRegOpDeclare.subst(iop)
1996        exec_output += NeonUnequalRegExecute.subst(iop)
1997        for type in types:
1998            substDict = { "targs" : type,
1999                          "class_name" : Name }
2000            exec_output += NeonExecDeclare.subst(substDict)
2001
2002    vhaddCode = '''
2003        Element carryBit =
2004            (((unsigned)srcElem1 & 0x1) +
2005             ((unsigned)srcElem2 & 0x1)) >> 1;
2006        // Use division instead of a shift to ensure the sign extension works
2007        // right. The compiler will figure out if it can be a shift. Mask the
2008        // inputs so they get truncated correctly.
2009        destElem = (((srcElem1 & ~(Element)1) / 2) +
2010                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2011    '''
2012    threeEqualRegInst("vhadd", "VhaddD", "SimdAddOp", allTypes, 2, vhaddCode)
2013    threeEqualRegInst("vhadd", "VhaddQ", "SimdAddOp", allTypes, 4, vhaddCode)
2014
2015    vrhaddCode = '''
2016        Element carryBit =
2017            (((unsigned)srcElem1 & 0x1) +
2018             ((unsigned)srcElem2 & 0x1) + 1) >> 1;
2019        // Use division instead of a shift to ensure the sign extension works
2020        // right. The compiler will figure out if it can be a shift. Mask the
2021        // inputs so they get truncated correctly.
2022        destElem = (((srcElem1 & ~(Element)1) / 2) +
2023                    ((srcElem2 & ~(Element)1) / 2)) + carryBit;
2024    '''
2025    threeEqualRegInst("vrhadd", "VrhaddD", "SimdAddOp", allTypes, 2, vrhaddCode)
2026    threeEqualRegInst("vrhadd", "VrhaddQ", "SimdAddOp", allTypes, 4, vrhaddCode)
2027
2028    vhsubCode = '''
2029        Element barrowBit =
2030            (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
2031        // Use division instead of a shift to ensure the sign extension works
2032        // right. The compiler will figure out if it can be a shift. Mask the
2033        // inputs so they get truncated correctly.
2034        destElem = (((srcElem1 & ~(Element)1) / 2) -
2035                    ((srcElem2 & ~(Element)1) / 2)) - barrowBit;
2036    '''
2037    threeEqualRegInst("vhsub", "VhsubD", "SimdAddOp", allTypes, 2, vhsubCode)
2038    threeEqualRegInst("vhsub", "VhsubQ", "SimdAddOp", allTypes, 4, vhsubCode)
2039
2040    vandCode = '''
2041        destElem = srcElem1 & srcElem2;
2042    '''
2043    threeEqualRegInst("vand", "VandD", "SimdAluOp", unsignedTypes, 2, vandCode)
2044    threeEqualRegInst("vand", "VandQ", "SimdAluOp", unsignedTypes, 4, vandCode)
2045
2046    vbicCode = '''
2047        destElem = srcElem1 & ~srcElem2;
2048    '''
2049    threeEqualRegInst("vbic", "VbicD", "SimdAluOp", unsignedTypes, 2, vbicCode)
2050    threeEqualRegInst("vbic", "VbicQ", "SimdAluOp", unsignedTypes, 4, vbicCode)
2051
2052    vorrCode = '''
2053        destElem = srcElem1 | srcElem2;
2054    '''
2055    threeEqualRegInst("vorr", "VorrD", "SimdAluOp", unsignedTypes, 2, vorrCode)
2056    threeEqualRegInst("vorr", "VorrQ", "SimdAluOp", unsignedTypes, 4, vorrCode)
2057
2058    threeEqualRegInst("vmov", "VmovD", "SimdMiscOp", unsignedTypes, 2, vorrCode)
2059    threeEqualRegInst("vmov", "VmovQ", "SimdMiscOp", unsignedTypes, 4, vorrCode)
2060
2061    vornCode = '''
2062        destElem = srcElem1 | ~srcElem2;
2063    '''
2064    threeEqualRegInst("vorn", "VornD", "SimdAluOp", unsignedTypes, 2, vornCode)
2065    threeEqualRegInst("vorn", "VornQ", "SimdAluOp", unsignedTypes, 4, vornCode)
2066
2067    veorCode = '''
2068        destElem = srcElem1 ^ srcElem2;
2069    '''
2070    threeEqualRegInst("veor", "VeorD", "SimdAluOp", unsignedTypes, 2, veorCode)
2071    threeEqualRegInst("veor", "VeorQ", "SimdAluOp", unsignedTypes, 4, veorCode)
2072
2073    vbifCode = '''
2074        destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);
2075    '''
2076    threeEqualRegInst("vbif", "VbifD", "SimdAluOp", unsignedTypes, 2, vbifCode, True)
2077    threeEqualRegInst("vbif", "VbifQ", "SimdAluOp", unsignedTypes, 4, vbifCode, True)
2078    vbitCode = '''
2079        destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);
2080    '''
2081    threeEqualRegInst("vbit", "VbitD", "SimdAluOp", unsignedTypes, 2, vbitCode, True)
2082    threeEqualRegInst("vbit", "VbitQ", "SimdAluOp", unsignedTypes, 4, vbitCode, True)
2083    vbslCode = '''
2084        destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);
2085    '''
2086    threeEqualRegInst("vbsl", "VbslD", "SimdAluOp", unsignedTypes, 2, vbslCode, True)
2087    threeEqualRegInst("vbsl", "VbslQ", "SimdAluOp", unsignedTypes, 4, vbslCode, True)
2088
2089    vmaxCode = '''
2090        destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;
2091    '''
2092    threeEqualRegInst("vmax", "VmaxD", "SimdCmpOp", allTypes, 2, vmaxCode)
2093    threeEqualRegInst("vmax", "VmaxQ", "SimdCmpOp", allTypes, 4, vmaxCode)
2094
2095    vminCode = '''
2096        destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;
2097    '''
2098    threeEqualRegInst("vmin", "VminD", "SimdCmpOp", allTypes, 2, vminCode)
2099    threeEqualRegInst("vmin", "VminQ", "SimdCmpOp", allTypes, 4, vminCode)
2100
2101    vaddCode = '''
2102        destElem = srcElem1 + srcElem2;
2103    '''
2104    threeEqualRegInst("vadd", "NVaddD", "SimdAddOp", unsignedTypes, 2, vaddCode)
2105    threeEqualRegInst("vadd", "NVaddQ", "SimdAddOp", unsignedTypes, 4, vaddCode)
2106
2107    threeEqualRegInst("vpadd", "NVpaddD", "SimdAddOp", smallUnsignedTypes,
2108                      2, vaddCode, pairwise=True)
2109    vaddlwCode = '''
2110        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
2111    '''
2112    threeRegLongInst("vaddl", "Vaddl", "SimdAddOp", smallTypes, vaddlwCode)
2113    threeRegWideInst("vaddw", "Vaddw", "SimdAddOp", smallTypes, vaddlwCode)
2114    vaddhnCode = '''
2115        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
2116                   (sizeof(Element) * 8);
2117    '''
2118    threeRegNarrowInst("vaddhn", "Vaddhn", "SimdAddOp", smallTypes, vaddhnCode)
2119    vraddhnCode = '''
2120        destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
2121                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2122                   (sizeof(Element) * 8);
2123    '''
2124    threeRegNarrowInst("vraddhn", "Vraddhn", "SimdAddOp", smallTypes, vraddhnCode)
2125
2126    vsubCode = '''
2127        destElem = srcElem1 - srcElem2;
2128    '''
2129    threeEqualRegInst("vsub", "NVsubD", "SimdAddOp", unsignedTypes, 2, vsubCode)
2130    threeEqualRegInst("vsub", "NVsubQ", "SimdAddOp", unsignedTypes, 4, vsubCode)
2131    vsublwCode = '''
2132        destElem = (BigElement)srcElem1 - (BigElement)srcElem2;
2133    '''
2134    threeRegLongInst("vsubl", "Vsubl", "SimdAddOp", smallTypes, vsublwCode)
2135    threeRegWideInst("vsubw", "Vsubw", "SimdAddOp", smallTypes, vsublwCode)
2136
2137    vqaddUCode = '''
2138        destElem = srcElem1 + srcElem2;
2139        FPSCR fpscr = (FPSCR) FpscrQc;
2140        if (destElem < srcElem1 || destElem < srcElem2) {
2141            destElem = (Element)(-1);
2142            fpscr.qc = 1;
2143        }
2144        FpscrQc = fpscr;
2145    '''
2146    threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode)
2147    threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode)
2148    vsubhnCode = '''
2149        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
2150                   (sizeof(Element) * 8);
2151    '''
2152    threeRegNarrowInst("vsubhn", "Vsubhn", "SimdAddOp", smallTypes, vsubhnCode)
2153    vrsubhnCode = '''
2154        destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
2155                    ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
2156                   (sizeof(Element) * 8);
2157    '''
2158    threeRegNarrowInst("vrsubhn", "Vrsubhn", "SimdAddOp", smallTypes, vrsubhnCode)
2159
2160    vqaddSCode = '''
2161        destElem = srcElem1 + srcElem2;
2162        FPSCR fpscr = (FPSCR) FpscrQc;
2163        bool negDest = (destElem < 0);
2164        bool negSrc1 = (srcElem1 < 0);
2165        bool negSrc2 = (srcElem2 < 0);
2166        if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
2167            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2168            if (negDest)
2169                destElem -= 1;
2170            fpscr.qc = 1;
2171        }
2172        FpscrQc = fpscr;
2173    '''
2174    threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode)
2175    threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode)
2176
2177    vqsubUCode = '''
2178        destElem = srcElem1 - srcElem2;
2179        FPSCR fpscr = (FPSCR) FpscrQc;
2180        if (destElem > srcElem1) {
2181            destElem = 0;
2182            fpscr.qc = 1;
2183        }
2184        FpscrQc = fpscr;
2185    '''
2186    threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode)
2187    threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode)
2188
2189    vqsubSCode = '''
2190        destElem = srcElem1 - srcElem2;
2191        FPSCR fpscr = (FPSCR) FpscrQc;
2192        bool negDest = (destElem < 0);
2193        bool negSrc1 = (srcElem1 < 0);
2194        bool posSrc2 = (srcElem2 >= 0);
2195        if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
2196            destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2197            if (negDest)
2198                destElem -= 1;
2199            fpscr.qc = 1;
2200        }
2201        FpscrQc = fpscr;
2202    '''
2203    threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode)
2204    threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode)
2205
2206    vcgtCode = '''
2207        destElem =  (srcElem1 > srcElem2) ? (Element)(-1) : 0;
2208    '''
2209    threeEqualRegInst("vcgt", "VcgtD", "SimdCmpOp", allTypes, 2, vcgtCode)
2210    threeEqualRegInst("vcgt", "VcgtQ", "SimdCmpOp", allTypes, 4, vcgtCode)
2211
2212    vcgeCode = '''
2213        destElem =  (srcElem1 >= srcElem2) ? (Element)(-1) : 0;
2214    '''
2215    threeEqualRegInst("vcge", "VcgeD", "SimdCmpOp", allTypes, 2, vcgeCode)
2216    threeEqualRegInst("vcge", "VcgeQ", "SimdCmpOp", allTypes, 4, vcgeCode)
2217
2218    vceqCode = '''
2219        destElem =  (srcElem1 == srcElem2) ? (Element)(-1) : 0;
2220    '''
2221    threeEqualRegInst("vceq", "VceqD", "SimdCmpOp", unsignedTypes, 2, vceqCode)
2222    threeEqualRegInst("vceq", "VceqQ", "SimdCmpOp", unsignedTypes, 4, vceqCode)
2223
2224    vshlCode = '''
2225        int16_t shiftAmt = (int8_t)srcElem2;
2226        if (shiftAmt < 0) {
2227            shiftAmt = -shiftAmt;
2228            if (shiftAmt >= sizeof(Element) * 8) {
2229                shiftAmt = sizeof(Element) * 8 - 1;
2230                destElem = 0;
2231            } else {
2232                destElem = (srcElem1 >> shiftAmt);
2233            }
2234            // Make sure the right shift sign extended when it should.
2235            if (ltz(srcElem1) && !ltz(destElem)) {
2236                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2237                                             1 - shiftAmt));
2238            }
2239        } else {
2240            if (shiftAmt >= sizeof(Element) * 8) {
2241                destElem = 0;
2242            } else {
2243                destElem = srcElem1 << shiftAmt;
2244            }
2245        }
2246    '''
2247    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
2248    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
2249
2250    vrshlCode = '''
2251        int16_t shiftAmt = (int8_t)srcElem2;
2252        if (shiftAmt < 0) {
2253            shiftAmt = -shiftAmt;
2254            Element rBit = 0;
2255            if (shiftAmt <= sizeof(Element) * 8)
2256                rBit = bits(srcElem1, shiftAmt - 1);
2257            if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
2258                rBit = 1;
2259            if (shiftAmt >= sizeof(Element) * 8) {
2260                shiftAmt = sizeof(Element) * 8 - 1;
2261                destElem = 0;
2262            } else {
2263                destElem = (srcElem1 >> shiftAmt);
2264            }
2265            // Make sure the right shift sign extended when it should.
2266            if (ltz(srcElem1) && !ltz(destElem)) {
2267                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2268                                             1 - shiftAmt));
2269            }
2270            destElem += rBit;
2271        } else if (shiftAmt > 0) {
2272            if (shiftAmt >= sizeof(Element) * 8) {
2273                destElem = 0;
2274            } else {
2275                destElem = srcElem1 << shiftAmt;
2276            }
2277        } else {
2278            destElem = srcElem1;
2279        }
2280    '''
2281    threeEqualRegInst("vrshl", "VrshlD", "SimdAluOp", allTypes, 2, vrshlCode)
2282    threeEqualRegInst("vrshl", "VrshlQ", "SimdAluOp", allTypes, 4, vrshlCode)
2283
2284    vqshlUCode = '''
2285        int16_t shiftAmt = (int8_t)srcElem2;
2286        FPSCR fpscr = (FPSCR) FpscrQc;
2287        if (shiftAmt < 0) {
2288            shiftAmt = -shiftAmt;
2289            if (shiftAmt >= sizeof(Element) * 8) {
2290                shiftAmt = sizeof(Element) * 8 - 1;
2291                destElem = 0;
2292            } else {
2293                destElem = (srcElem1 >> shiftAmt);
2294            }
2295        } else if (shiftAmt > 0) {
2296            if (shiftAmt >= sizeof(Element) * 8) {
2297                if (srcElem1 != 0) {
2298                    destElem = mask(sizeof(Element) * 8);
2299                    fpscr.qc = 1;
2300                } else {
2301                    destElem = 0;
2302                }
2303            } else {
2304                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2305                            sizeof(Element) * 8 - shiftAmt)) {
2306                    destElem = mask(sizeof(Element) * 8);
2307                    fpscr.qc = 1;
2308                } else {
2309                    destElem = srcElem1 << shiftAmt;
2310                }
2311            }
2312        } else {
2313            destElem = srcElem1;
2314        }
2315        FpscrQc = fpscr;
2316    '''
2317    threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode)
2318    threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode)
2319
2320    vqshlSCode = '''
2321        int16_t shiftAmt = (int8_t)srcElem2;
2322        FPSCR fpscr = (FPSCR) FpscrQc;
2323        if (shiftAmt < 0) {
2324            shiftAmt = -shiftAmt;
2325            if (shiftAmt >= sizeof(Element) * 8) {
2326                shiftAmt = sizeof(Element) * 8 - 1;
2327                destElem = 0;
2328            } else {
2329                destElem = (srcElem1 >> shiftAmt);
2330            }
2331            // Make sure the right shift sign extended when it should.
2332            if (srcElem1 < 0 && destElem >= 0) {
2333                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2334                                             1 - shiftAmt));
2335            }
2336        } else if (shiftAmt > 0) {
2337            bool sat = false;
2338            if (shiftAmt >= sizeof(Element) * 8) {
2339                if (srcElem1 != 0)
2340                    sat = true;
2341                else
2342                    destElem = 0;
2343            } else {
2344                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2345                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2346                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2347                    sat = true;
2348                } else {
2349                    destElem = srcElem1 << shiftAmt;
2350                }
2351            }
2352            if (sat) {
2353                fpscr.qc = 1;
2354                destElem = mask(sizeof(Element) * 8 - 1);
2355                if (srcElem1 < 0)
2356                    destElem = ~destElem;
2357            }
2358        } else {
2359            destElem = srcElem1;
2360        }
2361        FpscrQc = fpscr;
2362    '''
2363    threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode)
2364    threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode)
2365
2366    vqrshlUCode = '''
2367        int16_t shiftAmt = (int8_t)srcElem2;
2368        FPSCR fpscr = (FPSCR) FpscrQc;
2369        if (shiftAmt < 0) {
2370            shiftAmt = -shiftAmt;
2371            Element rBit = 0;
2372            if (shiftAmt <= sizeof(Element) * 8)
2373                rBit = bits(srcElem1, shiftAmt - 1);
2374            if (shiftAmt >= sizeof(Element) * 8) {
2375                shiftAmt = sizeof(Element) * 8 - 1;
2376                destElem = 0;
2377            } else {
2378                destElem = (srcElem1 >> shiftAmt);
2379            }
2380            destElem += rBit;
2381        } else {
2382            if (shiftAmt >= sizeof(Element) * 8) {
2383                if (srcElem1 != 0) {
2384                    destElem = mask(sizeof(Element) * 8);
2385                    fpscr.qc = 1;
2386                } else {
2387                    destElem = 0;
2388                }
2389            } else {
2390                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2391                            sizeof(Element) * 8 - shiftAmt)) {
2392                    destElem = mask(sizeof(Element) * 8);
2393                    fpscr.qc = 1;
2394                } else {
2395                    destElem = srcElem1 << shiftAmt;
2396                }
2397            }
2398        }
2399        FpscrQc = fpscr;
2400    '''
2401    threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode)
2402    threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode)
2403
2404    vqrshlSCode = '''
2405        int16_t shiftAmt = (int8_t)srcElem2;
2406        FPSCR fpscr = (FPSCR) FpscrQc;
2407        if (shiftAmt < 0) {
2408            shiftAmt = -shiftAmt;
2409            Element rBit = 0;
2410            if (shiftAmt <= sizeof(Element) * 8)
2411                rBit = bits(srcElem1, shiftAmt - 1);
2412            if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
2413                rBit = 1;
2414            if (shiftAmt >= sizeof(Element) * 8) {
2415                shiftAmt = sizeof(Element) * 8 - 1;
2416                destElem = 0;
2417            } else {
2418                destElem = (srcElem1 >> shiftAmt);
2419            }
2420            // Make sure the right shift sign extended when it should.
2421            if (srcElem1 < 0 && destElem >= 0) {
2422                destElem |= -((Element)1 << (sizeof(Element) * 8 -
2423                                             1 - shiftAmt));
2424            }
2425            destElem += rBit;
2426        } else if (shiftAmt > 0) {
2427            bool sat = false;
2428            if (shiftAmt >= sizeof(Element) * 8) {
2429                if (srcElem1 != 0)
2430                    sat = true;
2431                else
2432                    destElem = 0;
2433            } else {
2434                if (bits(srcElem1, sizeof(Element) * 8 - 1,
2435                            sizeof(Element) * 8 - 1 - shiftAmt) !=
2436                        ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
2437                    sat = true;
2438                } else {
2439                    destElem = srcElem1 << shiftAmt;
2440                }
2441            }
2442            if (sat) {
2443                fpscr.qc = 1;
2444                destElem = mask(sizeof(Element) * 8 - 1);
2445                if (srcElem1 < 0)
2446                    destElem = ~destElem;
2447            }
2448        } else {
2449            destElem = srcElem1;
2450        }
2451        FpscrQc = fpscr;
2452    '''
2453    threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode)
2454    threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode)
2455
2456    vabaCode = '''
2457        destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2458                                            (srcElem2 - srcElem1);
2459    '''
2460    threeEqualRegInst("vaba", "VabaD", "SimdAddAccOp", allTypes, 2, vabaCode, True)
2461    threeEqualRegInst("vaba", "VabaQ", "SimdAddAccOp", allTypes, 4, vabaCode, True)
2462    vabalCode = '''
2463        destElem += (srcElem1 > srcElem2) ?
2464            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2465            ((BigElement)srcElem2 - (BigElement)srcElem1);
2466    '''
2467    threeRegLongInst("vabal", "Vabal", "SimdAddAccOp", smallTypes, vabalCode, True)
2468
2469    vabdCode = '''
2470        destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
2471                                           (srcElem2 - srcElem1);
2472    '''
2473    threeEqualRegInst("vabd", "VabdD", "SimdAddOp", allTypes, 2, vabdCode)
2474    threeEqualRegInst("vabd", "VabdQ", "SimdAddOp", allTypes, 4, vabdCode)
2475    vabdlCode = '''
2476        destElem = (srcElem1 > srcElem2) ?
2477            ((BigElement)srcElem1 - (BigElement)srcElem2) :
2478            ((BigElement)srcElem2 - (BigElement)srcElem1);
2479    '''
2480    threeRegLongInst("vabdl", "Vabdl", "SimdAddOp", smallTypes, vabdlCode)
2481
2482    vtstCode = '''
2483        destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;
2484    '''
2485    threeEqualRegInst("vtst", "VtstD", "SimdAluOp", unsignedTypes, 2, vtstCode)
2486    threeEqualRegInst("vtst", "VtstQ", "SimdAluOp", unsignedTypes, 4, vtstCode)
2487
2488    vmulCode = '''
2489        destElem = srcElem1 * srcElem2;
2490    '''
2491    threeEqualRegInst("vmul", "NVmulD", "SimdMultOp", allTypes, 2, vmulCode)
2492    threeEqualRegInst("vmul", "NVmulQ", "SimdMultOp", allTypes, 4, vmulCode)
2493    vmullCode = '''
2494        destElem = (BigElement)srcElem1 * (BigElement)srcElem2;
2495    '''
2496    threeRegLongInst("vmull", "Vmull", "SimdMultOp", smallTypes, vmullCode)
2497
2498    vmlaCode = '''
2499        destElem = destElem + srcElem1 * srcElem2;
2500    '''
2501    threeEqualRegInst("vmla", "NVmlaD", "SimdMultAccOp", allTypes, 2, vmlaCode, True)
2502    threeEqualRegInst("vmla", "NVmlaQ", "SimdMultAccOp", allTypes, 4, vmlaCode, True)
2503    vmlalCode = '''
2504        destElem = destElem + (BigElement)srcElem1 * (BigElement)srcElem2;
2505    '''
2506    threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True)
2507
2508    vqdmlalCode = '''
2509        FPSCR fpscr = (FPSCR) FpscrQc;
2510        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2511        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2512        Element halfNeg = maxNeg / 2;
2513        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2514            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2515            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2516            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2517            fpscr.qc = 1;
2518        }
2519        bool negPreDest = ltz(destElem);
2520        destElem += midElem;
2521        bool negDest = ltz(destElem);
2522        bool negMid = ltz(midElem);
2523        if (negPreDest == negMid && negMid != negDest) {
2524            destElem = mask(sizeof(BigElement) * 8 - 1);
2525            if (negPreDest)
2526                destElem = ~destElem;
2527            fpscr.qc = 1;
2528        }
2529        FpscrQc = fpscr;
2530    '''
2531    threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2532
2533    vqdmlslCode = '''
2534        FPSCR fpscr = (FPSCR) FpscrQc;
2535        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2536        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2537        Element halfNeg = maxNeg / 2;
2538        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2539            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2540            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2541            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
2542            fpscr.qc = 1;
2543        }
2544        bool negPreDest = ltz(destElem);
2545        destElem -= midElem;
2546        bool negDest = ltz(destElem);
2547        bool posMid = ltz((BigElement)-midElem);
2548        if (negPreDest == posMid && posMid != negDest) {
2549            destElem = mask(sizeof(BigElement) * 8 - 1);
2550            if (negPreDest)
2551                destElem = ~destElem;
2552            fpscr.qc = 1;
2553        }
2554        FpscrQc = fpscr;
2555    '''
2556    threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2557
2558    vqdmullCode = '''
2559        FPSCR fpscr = (FPSCR) FpscrQc;
2560        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
2561        if (srcElem1 == srcElem2 &&
2562                srcElem1 == (Element)((Element)1 <<
2563                    (Element)(sizeof(Element) * 8 - 1))) {
2564            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
2565            fpscr.qc = 1;
2566        }
2567        FpscrQc = fpscr;
2568    '''
2569    threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode)
2570
2571    vmlsCode = '''
2572        destElem = destElem - srcElem1 * srcElem2;
2573    '''
2574    threeEqualRegInst("vmls", "NVmlsD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2575    threeEqualRegInst("vmls", "NVmlsQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2576    vmlslCode = '''
2577        destElem = destElem - (BigElement)srcElem1 * (BigElement)srcElem2;
2578    '''
2579    threeRegLongInst("vmlsl", "Vmlsl", "SimdMultAccOp", smallTypes, vmlslCode, True)
2580
2581    vmulpCode = '''
2582        destElem = 0;
2583        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2584            if (bits(srcElem2, j))
2585                destElem ^= srcElem1 << j;
2586        }
2587    '''
2588    threeEqualRegInst("vmul", "NVmulpD", "SimdMultOp", unsignedTypes, 2, vmulpCode)
2589    threeEqualRegInst("vmul", "NVmulpQ", "SimdMultOp", unsignedTypes, 4, vmulpCode)
2590    vmullpCode = '''
2591        destElem = 0;
2592        for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
2593            if (bits(srcElem2, j))
2594                destElem ^= (BigElement)srcElem1 << j;
2595        }
2596    '''
2597    threeRegLongInst("vmull", "Vmullp", "SimdMultOp", smallUnsignedTypes, vmullpCode)
2598
2599    threeEqualRegInst("vpmax", "VpmaxD", "SimdCmpOp", smallTypes, 2, vmaxCode, pairwise=True)
2600
2601    threeEqualRegInst("vpmin", "VpminD", "SimdCmpOp", smallTypes, 2, vminCode, pairwise=True)
2602
2603    vqdmulhCode = '''
2604        FPSCR fpscr = (FPSCR) FpscrQc;
2605        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
2606                   (sizeof(Element) * 8);
2607        if (srcElem1 == srcElem2 &&
2608                srcElem1 == (Element)((Element)1 <<
2609                    (sizeof(Element) * 8 - 1))) {
2610            destElem = ~srcElem1;
2611            fpscr.qc = 1;
2612        }
2613        FpscrQc = fpscr;
2614    '''
2615    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2616    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2617
2618    vqrdmulhCode = '''
2619        FPSCR fpscr = (FPSCR) FpscrQc;
2620        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
2621                    ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
2622                   (sizeof(Element) * 8);
2623        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
2624        Element halfNeg = maxNeg / 2;
2625        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
2626            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
2627            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
2628            if (destElem < 0) {
2629                destElem = mask(sizeof(Element) * 8 - 1);
2630            } else {
2631                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2632            }
2633            fpscr.qc = 1;
2634        }
2635        FpscrQc = fpscr;
2636    '''
2637    threeEqualRegInst("vqrdmulh", "VqrdmulhD",
2638            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2639    threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
2640            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2641
2642    vmaxfpCode = '''
2643        FPSCR fpscr = (FPSCR) FpscrExc;
2644        bool done;
2645        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2646        if (!done) {
2647            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
2648                               true, true, VfpRoundNearest);
2649        } else if (flushToZero(srcReg1, srcReg2)) {
2650            fpscr.idc = 1;
2651        }
2652        FpscrExc = fpscr;
2653    '''
2654    threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
2655    threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
2656
2657    vminfpCode = '''
2658        FPSCR fpscr = (FPSCR) FpscrExc;
2659        bool done;
2660        destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
2661        if (!done) {
2662            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
2663                               true, true, VfpRoundNearest);
2664        } else if (flushToZero(srcReg1, srcReg2)) {
2665            fpscr.idc = 1;
2666        }
2667        FpscrExc = fpscr;
2668    '''
2669    threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
2670    threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
2671
2672    threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2673                        2, vmaxfpCode, pairwise=True)
2674    threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
2675                        4, vmaxfpCode, pairwise=True)
2676
2677    threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2678                        2, vminfpCode, pairwise=True)
2679    threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
2680                        4, vminfpCode, pairwise=True)
2681
2682    vaddfpCode = '''
2683        FPSCR fpscr = (FPSCR) FpscrExc;
2684        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS,
2685                           true, true, VfpRoundNearest);
2686        FpscrExc = fpscr;
2687    '''
2688    threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode)
2689    threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode)
2690
2691    threeEqualRegInstFp("vpadd", "VpaddDFp", "SimdFloatAddOp", ("float",),
2692                        2, vaddfpCode, pairwise=True)
2693    threeEqualRegInstFp("vpadd", "VpaddQFp", "SimdFloatAddOp", ("float",),
2694                        4, vaddfpCode, pairwise=True)
2695
2696    vsubfpCode = '''
2697        FPSCR fpscr = (FPSCR) FpscrExc;
2698        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2699                           true, true, VfpRoundNearest);
2700        FpscrExc = fpscr;
2701    '''
2702    threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode)
2703    threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode)
2704
2705    vmulfpCode = '''
2706        FPSCR fpscr = (FPSCR) FpscrExc;
2707        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2708                           true, true, VfpRoundNearest);
2709        FpscrExc = fpscr;
2710    '''
2711    threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2712    threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2713
2714    vmlafpCode = '''
2715        FPSCR fpscr = (FPSCR) FpscrExc;
2716        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2717                             true, true, VfpRoundNearest);
2718        destReg = binaryOp(fpscr, mid, destReg, fpAddS,
2719                           true, true, VfpRoundNearest);
2720        FpscrExc = fpscr;
2721    '''
2722    threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2723    threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2724
2725    vfmafpCode = '''
2726        FPSCR fpscr = (FPSCR) FpscrExc;
2727        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
2728                            true, true, VfpRoundNearest);
2729        FpscrExc = fpscr;
2730    '''
2731    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
2732    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
2733
2734    vfmsfpCode = '''
2735        FPSCR fpscr = (FPSCR) FpscrExc;
2736        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
2737                            true, true, VfpRoundNearest);
2738        FpscrExc = fpscr;
2739    '''
2740    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
2741    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
2742
2743    vmlsfpCode = '''
2744        FPSCR fpscr = (FPSCR) FpscrExc;
2745        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
2746                             true, true, VfpRoundNearest);
2747        destReg = binaryOp(fpscr, destReg, mid, fpSubS,
2748                           true, true, VfpRoundNearest);
2749        FpscrExc = fpscr;
2750    '''
2751    threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2752    threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2753
2754    vcgtfpCode = '''
2755        FPSCR fpscr = (FPSCR) FpscrExc;
2756        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc,
2757                             true, true, VfpRoundNearest);
2758        destReg = (res == 0) ? -1 : 0;
2759        if (res == 2.0)
2760            fpscr.ioc = 1;
2761        FpscrExc = fpscr;
2762    '''
2763    threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",),
2764            2, vcgtfpCode, toInt = True)
2765    threeEqualRegInstFp("vcgt", "VcgtQFp", "SimdFloatCmpOp", ("float",),
2766            4, vcgtfpCode, toInt = True)
2767
2768    vcgefpCode = '''
2769        FPSCR fpscr = (FPSCR) FpscrExc;
2770        float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc,
2771                             true, true, VfpRoundNearest);
2772        destReg = (res == 0) ? -1 : 0;
2773        if (res == 2.0)
2774            fpscr.ioc = 1;
2775        FpscrExc = fpscr;
2776    '''
2777    threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",),
2778            2, vcgefpCode, toInt = True)
2779    threeEqualRegInstFp("vcge", "VcgeQFp", "SimdFloatCmpOp", ("float",),
2780            4, vcgefpCode, toInt = True)
2781
2782    vacgtfpCode = '''
2783        FPSCR fpscr = (FPSCR) FpscrExc;
2784        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc,
2785                             true, true, VfpRoundNearest);
2786        destReg = (res == 0) ? -1 : 0;
2787        if (res == 2.0)
2788            fpscr.ioc = 1;
2789        FpscrExc = fpscr;
2790    '''
2791    threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",),
2792            2, vacgtfpCode, toInt = True)
2793    threeEqualRegInstFp("vacgt", "VacgtQFp", "SimdFloatCmpOp", ("float",),
2794            4, vacgtfpCode, toInt = True)
2795
2796    vacgefpCode = '''
2797        FPSCR fpscr = (FPSCR) FpscrExc;
2798        float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc,
2799                             true, true, VfpRoundNearest);
2800        destReg = (res == 0) ? -1 : 0;
2801        if (res == 2.0)
2802            fpscr.ioc = 1;
2803        FpscrExc = fpscr;
2804    '''
2805    threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",),
2806            2, vacgefpCode, toInt = True)
2807    threeEqualRegInstFp("vacge", "VacgeQFp", "SimdFloatCmpOp", ("float",),
2808            4, vacgefpCode, toInt = True)
2809
2810    vceqfpCode = '''
2811        FPSCR fpscr = (FPSCR) FpscrExc;
2812        float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc,
2813                             true, true, VfpRoundNearest);
2814        destReg = (res == 0) ? -1 : 0;
2815        if (res == 2.0)
2816            fpscr.ioc = 1;
2817        FpscrExc = fpscr;
2818    '''
2819    threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",),
2820            2, vceqfpCode, toInt = True)
2821    threeEqualRegInstFp("vceq", "VceqQFp", "SimdFloatCmpOp", ("float",),
2822            4, vceqfpCode, toInt = True)
2823
2824    vrecpsCode = '''
2825        FPSCR fpscr = (FPSCR) FpscrExc;
2826        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS,
2827                           true, true, VfpRoundNearest);
2828        FpscrExc = fpscr;
2829    '''
2830    threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode)
2831    threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode)
2832
2833    vrsqrtsCode = '''
2834        FPSCR fpscr = (FPSCR) FpscrExc;
2835        destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS,
2836                           true, true, VfpRoundNearest);
2837        FpscrExc = fpscr;
2838    '''
2839    threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode)
2840    threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode)
2841
2842    vabdfpCode = '''
2843        FPSCR fpscr = (FPSCR) FpscrExc;
2844        float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS,
2845                             true, true, VfpRoundNearest);
2846        destReg = fabs(mid);
2847        FpscrExc = fpscr;
2848    '''
2849    threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode)
2850    threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode)
2851
2852    twoEqualRegInst("vmla", "VmlasD", "SimdMultAccOp", unsignedTypes, 2, vmlaCode, True)
2853    twoEqualRegInst("vmla", "VmlasQ", "SimdMultAccOp", unsignedTypes, 4, vmlaCode, True)
2854    twoEqualRegInstFp("vmla", "VmlasDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
2855    twoEqualRegInstFp("vmla", "VmlasQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
2856    twoRegLongInst("vmlal", "Vmlals", "SimdMultAccOp", smallTypes, vmlalCode, True)
2857
2858    twoEqualRegInst("vmls", "VmlssD", "SimdMultAccOp", allTypes, 2, vmlsCode, True)
2859    twoEqualRegInst("vmls", "VmlssQ", "SimdMultAccOp", allTypes, 4, vmlsCode, True)
2860    twoEqualRegInstFp("vmls", "VmlssDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True)
2861    twoEqualRegInstFp("vmls", "VmlssQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True)
2862    twoRegLongInst("vmlsl", "Vmlsls", "SimdMultAccOp", smallTypes, vmlslCode, True)
2863
2864    twoEqualRegInst("vmul", "VmulsD", "SimdMultOp", allTypes, 2, vmulCode)
2865    twoEqualRegInst("vmul", "VmulsQ", "SimdMultOp", allTypes, 4, vmulCode)
2866    twoEqualRegInstFp("vmul", "VmulsDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode)
2867    twoEqualRegInstFp("vmul", "VmulsQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode)
2868    twoRegLongInst("vmull", "Vmulls", "SimdMultOp", smallTypes, vmullCode)
2869
2870    twoRegLongInst("vqdmull", "Vqdmulls", "SimdMultOp", smallTypes, vqdmullCode)
2871    twoRegLongInst("vqdmlal", "Vqdmlals", "SimdMultAccOp", smallTypes, vqdmlalCode, True)
2872    twoRegLongInst("vqdmlsl", "Vqdmlsls", "SimdMultAccOp", smallTypes, vqdmlslCode, True)
2873    twoEqualRegInst("vqdmulh", "VqdmulhsD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
2874    twoEqualRegInst("vqdmulh", "VqdmulhsQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
2875    twoEqualRegInst("vqrdmulh", "VqrdmulhsD",
2876            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
2877    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
2878            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
2879
2880    vshrCode = '''
2881        if (imm >= sizeof(srcElem1) * 8) {
2882            if (ltz(srcElem1))
2883                destElem = -1;
2884            else
2885                destElem = 0;
2886        } else {
2887            destElem = srcElem1 >> imm;
2888        }
2889    '''
2890    twoRegShiftInst("vshr", "NVshrD", "SimdShiftOp", allTypes, 2, vshrCode)
2891    twoRegShiftInst("vshr", "NVshrQ", "SimdShiftOp", allTypes, 4, vshrCode)
2892
2893    vsraCode = '''
2894        Element mid;;
2895        if (imm >= sizeof(srcElem1) * 8) {
2896            mid = ltz(srcElem1) ? -1 : 0;
2897        } else {
2898            mid = srcElem1 >> imm;
2899            if (ltz(srcElem1) && !ltz(mid)) {
2900                mid |= -(mid & ((Element)1 <<
2901                            (sizeof(Element) * 8 - 1 - imm)));
2902            }
2903        }
2904        destElem += mid;
2905    '''
2906    twoRegShiftInst("vsra", "NVsraD", "SimdShiftAccOp", allTypes, 2, vsraCode, True)
2907    twoRegShiftInst("vsra", "NVsraQ", "SimdShiftAccOp", allTypes, 4, vsraCode, True)
2908
2909    vrshrCode = '''
2910        if (imm > sizeof(srcElem1) * 8) {
2911            destElem = 0;
2912        } else if (imm) {
2913            Element rBit = bits(srcElem1, imm - 1);
2914            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2915        } else {
2916            destElem = srcElem1;
2917        }
2918    '''
2919    twoRegShiftInst("vrshr", "NVrshrD", "SimdShiftOp", allTypes, 2, vrshrCode)
2920    twoRegShiftInst("vrshr", "NVrshrQ", "SimdShiftOp", allTypes, 4, vrshrCode)
2921
2922    vrsraCode = '''
2923        if (imm > sizeof(srcElem1) * 8) {
2924            destElem += 0;
2925        } else if (imm) {
2926            Element rBit = bits(srcElem1, imm - 1);
2927            destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
2928        } else {
2929            destElem += srcElem1;
2930        }
2931    '''
2932    twoRegShiftInst("vrsra", "NVrsraD", "SimdShiftAccOp", allTypes, 2, vrsraCode, True)
2933    twoRegShiftInst("vrsra", "NVrsraQ", "SimdShiftAccOp", allTypes, 4, vrsraCode, True)
2934
2935    vsriCode = '''
2936        if (imm >= sizeof(Element) * 8)
2937            destElem = destElem;
2938        else
2939            destElem = (srcElem1 >> imm) |
2940                (destElem & ~mask(sizeof(Element) * 8 - imm));
2941    '''
2942    twoRegShiftInst("vsri", "NVsriD", "SimdShiftOp", unsignedTypes, 2, vsriCode, True)
2943    twoRegShiftInst("vsri", "NVsriQ", "SimdShiftOp", unsignedTypes, 4, vsriCode, True)
2944
2945    vshlCode = '''
2946        if (imm >= sizeof(Element) * 8)
2947            destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
2948        else
2949            destElem = srcElem1 << imm;
2950    '''
2951    twoRegShiftInst("vshl", "NVshlD", "SimdShiftOp", unsignedTypes, 2, vshlCode)
2952    twoRegShiftInst("vshl", "NVshlQ", "SimdShiftOp", unsignedTypes, 4, vshlCode)
2953
2954    vsliCode = '''
2955        if (imm >= sizeof(Element) * 8)
2956            destElem = destElem;
2957        else
2958            destElem = (srcElem1 << imm) | (destElem & mask(imm));
2959    '''
2960    twoRegShiftInst("vsli", "NVsliD", "SimdShiftOp", unsignedTypes, 2, vsliCode, True)
2961    twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True)
2962
2963    vqshlCode = '''
2964        FPSCR fpscr = (FPSCR) FpscrQc;
2965        if (imm >= sizeof(Element) * 8) {
2966            if (srcElem1 != 0) {
2967                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2968                if (srcElem1 > 0)
2969                    destElem = ~destElem;
2970                fpscr.qc = 1;
2971            } else {
2972                destElem = 0;
2973            }
2974        } else if (imm) {
2975            destElem = (srcElem1 << imm);
2976            uint64_t topBits = bits((uint64_t)srcElem1,
2977                                    sizeof(Element) * 8 - 1,
2978                                    sizeof(Element) * 8 - 1 - imm);
2979            if (topBits != 0 && topBits != mask(imm + 1)) {
2980                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
2981                if (srcElem1 > 0)
2982                    destElem = ~destElem;
2983                fpscr.qc = 1;
2984            }
2985        } else {
2986            destElem = srcElem1;
2987        }
2988        FpscrQc = fpscr;
2989    '''
2990    twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode)
2991    twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode)
2992
2993    vqshluCode = '''
2994        FPSCR fpscr = (FPSCR) FpscrQc;
2995        if (imm >= sizeof(Element) * 8) {
2996            if (srcElem1 != 0) {
2997                destElem = mask(sizeof(Element) * 8);
2998                fpscr.qc = 1;
2999            } else {
3000                destElem = 0;
3001            }
3002        } else if (imm) {
3003            destElem = (srcElem1 << imm);
3004            uint64_t topBits = bits((uint64_t)srcElem1,
3005                                    sizeof(Element) * 8 - 1,
3006                                    sizeof(Element) * 8 - imm);
3007            if (topBits != 0) {
3008                destElem = mask(sizeof(Element) * 8);
3009                fpscr.qc = 1;
3010            }
3011        } else {
3012            destElem = srcElem1;
3013        }
3014        FpscrQc = fpscr;
3015    '''
3016    twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode)
3017    twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode)
3018
3019    vqshlusCode = '''
3020        FPSCR fpscr = (FPSCR) FpscrQc;
3021        if (imm >= sizeof(Element) * 8) {
3022            if (srcElem1 < 0) {
3023                destElem = 0;
3024                fpscr.qc = 1;
3025            } else if (srcElem1 > 0) {
3026                destElem = mask(sizeof(Element) * 8);
3027                fpscr.qc = 1;
3028            } else {
3029                destElem = 0;
3030            }
3031        } else if (imm) {
3032            destElem = (srcElem1 << imm);
3033            uint64_t topBits = bits((uint64_t)srcElem1,
3034                                    sizeof(Element) * 8 - 1,
3035                                    sizeof(Element) * 8 - imm);
3036            if (srcElem1 < 0) {
3037                destElem = 0;
3038                fpscr.qc = 1;
3039            } else if (topBits != 0) {
3040                destElem = mask(sizeof(Element) * 8);
3041                fpscr.qc = 1;
3042            }
3043        } else {
3044            if (srcElem1 < 0) {
3045                fpscr.qc = 1;
3046                destElem = 0;
3047            } else {
3048                destElem = srcElem1;
3049            }
3050        }
3051        FpscrQc = fpscr;
3052    '''
3053    twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode)
3054    twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode)
3055
3056    vshrnCode = '''
3057        if (imm >= sizeof(srcElem1) * 8) {
3058            destElem = 0;
3059        } else {
3060            destElem = srcElem1 >> imm;
3061        }
3062    '''
3063    twoRegNarrowShiftInst("vshrn", "NVshrn", "SimdShiftOp", smallUnsignedTypes, vshrnCode)
3064
3065    vrshrnCode = '''
3066        if (imm > sizeof(srcElem1) * 8) {
3067            destElem = 0;
3068        } else if (imm) {
3069            Element rBit = bits(srcElem1, imm - 1);
3070            destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
3071        } else {
3072            destElem = srcElem1;
3073        }
3074    '''
3075    twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode)
3076
3077    vqshrnCode = '''
3078        FPSCR fpscr = (FPSCR) FpscrQc;
3079        if (imm > sizeof(srcElem1) * 8) {
3080            if (srcElem1 != 0 && srcElem1 != -1)
3081                fpscr.qc = 1;
3082            destElem = 0;
3083        } else if (imm) {
3084            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3085            mid |= -(mid & ((BigElement)1 <<
3086                        (sizeof(BigElement) * 8 - 1 - imm)));
3087            if (mid != (Element)mid) {
3088                destElem = mask(sizeof(Element) * 8 - 1);
3089                if (srcElem1 < 0)
3090                    destElem = ~destElem;
3091                fpscr.qc = 1;
3092            } else {
3093                destElem = mid;
3094            }
3095        } else {
3096            destElem = srcElem1;
3097        }
3098        FpscrQc = fpscr;
3099    '''
3100    twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode)
3101
3102    vqshrunCode = '''
3103        FPSCR fpscr = (FPSCR) FpscrQc;
3104        if (imm > sizeof(srcElem1) * 8) {
3105            if (srcElem1 != 0)
3106                fpscr.qc = 1;
3107            destElem = 0;
3108        } else if (imm) {
3109            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3110            if (mid != (Element)mid) {
3111                destElem = mask(sizeof(Element) * 8);
3112                fpscr.qc = 1;
3113            } else {
3114                destElem = mid;
3115            }
3116        } else {
3117            destElem = srcElem1;
3118        }
3119        FpscrQc = fpscr;
3120    '''
3121    twoRegNarrowShiftInst("vqshrun", "NVqshrun",
3122                          "SimdShiftOp", smallUnsignedTypes, vqshrunCode)
3123
3124    vqshrunsCode = '''
3125        FPSCR fpscr = (FPSCR) FpscrQc;
3126        if (imm > sizeof(srcElem1) * 8) {
3127            if (srcElem1 != 0)
3128                fpscr.qc = 1;
3129            destElem = 0;
3130        } else if (imm) {
3131            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
3132            if (bits(mid, sizeof(BigElement) * 8 - 1,
3133                          sizeof(Element) * 8) != 0) {
3134                if (srcElem1 < 0) {
3135                    destElem = 0;
3136                } else {
3137                    destElem = mask(sizeof(Element) * 8);
3138                }
3139                fpscr.qc = 1;
3140            } else {
3141                destElem = mid;
3142            }
3143        } else {
3144            destElem = srcElem1;
3145        }
3146        FpscrQc = fpscr;
3147    '''
3148    twoRegNarrowShiftInst("vqshrun", "NVqshruns",
3149                          "SimdShiftOp", smallSignedTypes, vqshrunsCode)
3150
3151    vqrshrnCode = '''
3152        FPSCR fpscr = (FPSCR) FpscrQc;
3153        if (imm > sizeof(srcElem1) * 8) {
3154            if (srcElem1 != 0 && srcElem1 != -1)
3155                fpscr.qc = 1;
3156            destElem = 0;
3157        } else if (imm) {
3158            BigElement mid = (srcElem1 >> (imm - 1));
3159            uint64_t rBit = mid & 0x1;
3160            mid >>= 1;
3161            mid |= -(mid & ((BigElement)1 <<
3162                        (sizeof(BigElement) * 8 - 1 - imm)));
3163            mid += rBit;
3164            if (mid != (Element)mid) {
3165                destElem = mask(sizeof(Element) * 8 - 1);
3166                if (srcElem1 < 0)
3167                    destElem = ~destElem;
3168                fpscr.qc = 1;
3169            } else {
3170                destElem = mid;
3171            }
3172        } else {
3173            if (srcElem1 != (Element)srcElem1) {
3174                destElem = mask(sizeof(Element) * 8 - 1);
3175                if (srcElem1 < 0)
3176                    destElem = ~destElem;
3177                fpscr.qc = 1;
3178            } else {
3179                destElem = srcElem1;
3180            }
3181        }
3182        FpscrQc = fpscr;
3183    '''
3184    twoRegNarrowShiftInst("vqrshrn", "NVqrshrn",
3185                          "SimdShiftOp", smallSignedTypes, vqrshrnCode)
3186
3187    vqrshrunCode = '''
3188        FPSCR fpscr = (FPSCR) FpscrQc;
3189        if (imm > sizeof(srcElem1) * 8) {
3190            if (srcElem1 != 0)
3191                fpscr.qc = 1;
3192            destElem = 0;
3193        } else if (imm) {
3194            BigElement mid = (srcElem1 >> (imm - 1));
3195            uint64_t rBit = mid & 0x1;
3196            mid >>= 1;
3197            mid += rBit;
3198            if (mid != (Element)mid) {
3199                destElem = mask(sizeof(Element) * 8);
3200                fpscr.qc = 1;
3201            } else {
3202                destElem = mid;
3203            }
3204        } else {
3205            if (srcElem1 != (Element)srcElem1) {
3206                destElem = mask(sizeof(Element) * 8 - 1);
3207                fpscr.qc = 1;
3208            } else {
3209                destElem = srcElem1;
3210            }
3211        }
3212        FpscrQc = fpscr;
3213    '''
3214    twoRegNarrowShiftInst("vqrshrun", "NVqrshrun",
3215                          "SimdShiftOp", smallUnsignedTypes, vqrshrunCode)
3216
3217    vqrshrunsCode = '''
3218        FPSCR fpscr = (FPSCR) FpscrQc;
3219        if (imm > sizeof(srcElem1) * 8) {
3220            if (srcElem1 != 0)
3221                fpscr.qc = 1;
3222            destElem = 0;
3223        } else if (imm) {
3224            BigElement mid = (srcElem1 >> (imm - 1));
3225            uint64_t rBit = mid & 0x1;
3226            mid >>= 1;
3227            mid |= -(mid & ((BigElement)1 <<
3228                            (sizeof(BigElement) * 8 - 1 - imm)));
3229            mid += rBit;
3230            if (bits(mid, sizeof(BigElement) * 8 - 1,
3231                          sizeof(Element) * 8) != 0) {
3232                if (srcElem1 < 0) {
3233                    destElem = 0;
3234                } else {
3235                    destElem = mask(sizeof(Element) * 8);
3236                }
3237                fpscr.qc = 1;
3238            } else {
3239                destElem = mid;
3240            }
3241        } else {
3242            if (srcElem1 < 0) {
3243                fpscr.qc = 1;
3244                destElem = 0;
3245            } else {
3246                destElem = srcElem1;
3247            }
3248        }
3249        FpscrQc = fpscr;
3250    '''
3251    twoRegNarrowShiftInst("vqrshrun", "NVqrshruns",
3252                          "SimdShiftOp", smallSignedTypes, vqrshrunsCode)
3253
3254    vshllCode = '''
3255        if (imm >= sizeof(destElem) * 8) {
3256            destElem = 0;
3257        } else {
3258            destElem = (BigElement)srcElem1 << imm;
3259        }
3260    '''
3261    twoRegLongShiftInst("vshll", "NVshll", "SimdShiftOp", smallTypes, vshllCode)
3262
3263    vmovlCode = '''
3264        destElem = srcElem1;
3265    '''
3266    twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode)
3267
3268    vcvt2ufxCode = '''
3269        FPSCR fpscr = (FPSCR) FpscrExc;
3270        if (flushToZero(srcElem1))
3271            fpscr.idc = 1;
3272        VfpSavedState state = prepFpState(VfpRoundNearest);
3273        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3274        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
3275        __asm__ __volatile__("" :: "m" (destReg));
3276        finishVfp(fpscr, state, true);
3277        FpscrExc = fpscr;
3278    '''
3279    twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",),
3280            2, vcvt2ufxCode, toInt = True)
3281    twoRegShiftInst("vcvt", "NVcvt2ufxQ", "SimdCvtOp", ("float",),
3282            4, vcvt2ufxCode, toInt = True)
3283
3284    vcvt2sfxCode = '''
3285        FPSCR fpscr = (FPSCR) FpscrExc;
3286        if (flushToZero(srcElem1))
3287            fpscr.idc = 1;
3288        VfpSavedState state = prepFpState(VfpRoundNearest);
3289        __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
3290        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
3291        __asm__ __volatile__("" :: "m" (destReg));
3292        finishVfp(fpscr, state, true);
3293        FpscrExc = fpscr;
3294    '''
3295    twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",),
3296            2, vcvt2sfxCode, toInt = True)
3297    twoRegShiftInst("vcvt", "NVcvt2sfxQ", "SimdCvtOp", ("float",),
3298            4, vcvt2sfxCode, toInt = True)
3299
3300    vcvtu2fpCode = '''
3301        FPSCR fpscr = (FPSCR) FpscrExc;
3302        VfpSavedState state = prepFpState(VfpRoundNearest);
3303        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3304        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
3305        __asm__ __volatile__("" :: "m" (destElem));
3306        finishVfp(fpscr, state, true);
3307        FpscrExc = fpscr;
3308    '''
3309    twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",),
3310            2, vcvtu2fpCode, fromInt = True)
3311    twoRegShiftInst("vcvt", "NVcvtu2fpQ", "SimdCvtOp", ("float",),
3312            4, vcvtu2fpCode, fromInt = True)
3313
3314    vcvts2fpCode = '''
3315        FPSCR fpscr = (FPSCR) FpscrExc;
3316        VfpSavedState state = prepFpState(VfpRoundNearest);
3317        __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
3318        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
3319        __asm__ __volatile__("" :: "m" (destElem));
3320        finishVfp(fpscr, state, true);
3321        FpscrExc = fpscr;
3322    '''
3323    twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",),
3324            2, vcvts2fpCode, fromInt = True)
3325    twoRegShiftInst("vcvt", "NVcvts2fpQ", "SimdCvtOp", ("float",),
3326            4, vcvts2fpCode, fromInt = True)
3327
3328    vcvts2hCode = '''
3329        destElem = 0;
3330        FPSCR fpscr = (FPSCR) FpscrExc;
3331        float srcFp1 = bitsToFp(srcElem1, (float)0.0);
3332        if (flushToZero(srcFp1))
3333            fpscr.idc = 1;
3334        VfpSavedState state = prepFpState(VfpRoundNearest);
3335        __asm__ __volatile__("" : "=m" (srcFp1), "=m" (destElem)
3336                                : "m" (srcFp1), "m" (destElem));
3337        destElem = vcvtFpSFpH(fpscr, true, true, VfpRoundNearest,
3338                              fpscr.ahp, srcFp1);
3339        __asm__ __volatile__("" :: "m" (destElem));
3340        finishVfp(fpscr, state, true);
3341        FpscrExc = fpscr;
3342    '''
3343    twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode)
3344
3345    vcvth2sCode = '''
3346        destElem = 0;
3347        FPSCR fpscr = (FPSCR) FpscrExc;
3348        VfpSavedState state = prepFpState(VfpRoundNearest);
3349        __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem)
3350                                : "m" (srcElem1), "m" (destElem));
3351        destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1));
3352        __asm__ __volatile__("" :: "m" (destElem));
3353        finishVfp(fpscr, state, true);
3354        FpscrExc = fpscr;
3355    '''
3356    twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode)
3357
3358    vrsqrteCode = '''
3359        destElem = unsignedRSqrtEstimate(srcElem1);
3360    '''
3361    twoRegMiscInst("vrsqrte", "NVrsqrteD", "SimdSqrtOp", ("uint32_t",), 2, vrsqrteCode)
3362    twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode)
3363
3364    vrsqrtefpCode = '''
3365        FPSCR fpscr = (FPSCR) FpscrExc;
3366        if (flushToZero(srcReg1))
3367            fpscr.idc = 1;
3368        destReg = fprSqrtEstimate(fpscr, srcReg1);
3369        FpscrExc = fpscr;
3370    '''
3371    twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode)
3372    twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode)
3373
3374    vrecpeCode = '''
3375        destElem = unsignedRecipEstimate(srcElem1);
3376    '''
3377    twoRegMiscInst("vrecpe", "NVrecpeD", "SimdMultAccOp", ("uint32_t",), 2, vrecpeCode)
3378    twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode)
3379
3380    vrecpefpCode = '''
3381        FPSCR fpscr = (FPSCR) FpscrExc;
3382        if (flushToZero(srcReg1))
3383            fpscr.idc = 1;
3384        destReg = fpRecipEstimate(fpscr, srcReg1);
3385        FpscrExc = fpscr;
3386    '''
3387    twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode)
3388    twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode)
3389
3390    vrev16Code = '''
3391        destElem = srcElem1;
3392        unsigned groupSize = ((1 << 1) / sizeof(Element));
3393        unsigned reverseMask = (groupSize - 1);
3394        j = i ^ reverseMask;
3395    '''
3396    twoRegMiscInst("vrev16", "NVrev16D", "SimdAluOp", ("uint8_t",), 2, vrev16Code)
3397    twoRegMiscInst("vrev16", "NVrev16Q", "SimdAluOp", ("uint8_t",), 4, vrev16Code)
3398    vrev32Code = '''
3399        destElem = srcElem1;
3400        unsigned groupSize = ((1 << 2) / sizeof(Element));
3401        unsigned reverseMask = (groupSize - 1);
3402        j = i ^ reverseMask;
3403    '''
3404    twoRegMiscInst("vrev32", "NVrev32D",
3405            "SimdAluOp", ("uint8_t", "uint16_t"), 2, vrev32Code)
3406    twoRegMiscInst("vrev32", "NVrev32Q",
3407            "SimdAluOp", ("uint8_t", "uint16_t"), 4, vrev32Code)
3408    vrev64Code = '''
3409        destElem = srcElem1;
3410        unsigned groupSize = ((1 << 3) / sizeof(Element));
3411        unsigned reverseMask = (groupSize - 1);
3412        j = i ^ reverseMask;
3413    '''
3414    twoRegMiscInst("vrev64", "NVrev64D", "SimdAluOp", smallUnsignedTypes, 2, vrev64Code)
3415    twoRegMiscInst("vrev64", "NVrev64Q", "SimdAluOp", smallUnsignedTypes, 4, vrev64Code)
3416
3417    vpaddlCode = '''
3418        destElem = (BigElement)srcElem1 + (BigElement)srcElem2;
3419    '''
3420    twoRegCondenseInst("vpaddl", "NVpaddlD", "SimdAddOp", smallTypes, 2, vpaddlCode)
3421    twoRegCondenseInst("vpaddl", "NVpaddlQ", "SimdAddOp", smallTypes, 4, vpaddlCode)
3422
3423    vpadalCode = '''
3424        destElem += (BigElement)srcElem1 + (BigElement)srcElem2;
3425    '''
3426    twoRegCondenseInst("vpadal", "NVpadalD", "SimdAddAccOp", smallTypes, 2, vpadalCode, True)
3427    twoRegCondenseInst("vpadal", "NVpadalQ", "SimdAddAccOp", smallTypes, 4, vpadalCode, True)
3428
3429    vclsCode = '''
3430        unsigned count = 0;
3431        if (srcElem1 < 0) {
3432            srcElem1 <<= 1;
3433            while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
3434                count++;
3435                srcElem1 <<= 1;
3436            }
3437        } else {
3438            srcElem1 <<= 1;
3439            while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
3440                count++;
3441                srcElem1 <<= 1;
3442            }
3443        }
3444        destElem = count;
3445    '''
3446    twoRegMiscInst("vcls", "NVclsD", "SimdAluOp", signedTypes, 2, vclsCode)
3447    twoRegMiscInst("vcls", "NVclsQ", "SimdAluOp", signedTypes, 4, vclsCode)
3448
3449    vclzCode = '''
3450        unsigned count = 0;
3451        while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
3452            count++;
3453            srcElem1 <<= 1;
3454        }
3455        destElem = count;
3456    '''
3457    twoRegMiscInst("vclz", "NVclzD", "SimdAluOp", signedTypes, 2, vclzCode)
3458    twoRegMiscInst("vclz", "NVclzQ", "SimdAluOp", signedTypes, 4, vclzCode)
3459
3460    vcntCode = '''
3461        unsigned count = 0;
3462        while (srcElem1 && count < sizeof(Element) * 8) {
3463            count += srcElem1 & 0x1;
3464            srcElem1 >>= 1;
3465        }
3466        destElem = count;
3467    '''
3468
3469    twoRegMiscInst("vcnt", "NVcntD", "SimdAluOp", unsignedTypes, 2, vcntCode)
3470    twoRegMiscInst("vcnt", "NVcntQ", "SimdAluOp", unsignedTypes, 4, vcntCode)
3471
3472    vmvnCode = '''
3473        destElem = ~srcElem1;
3474    '''
3475    twoRegMiscInst("vmvn", "NVmvnD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3476    twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3477
3478    vqabsCode = '''
3479        FPSCR fpscr = (FPSCR) FpscrQc;
3480        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3481            fpscr.qc = 1;
3482            destElem = ~srcElem1;
3483        } else if (srcElem1 < 0) {
3484            destElem = -srcElem1;
3485        } else {
3486            destElem = srcElem1;
3487        }
3488        FpscrQc = fpscr;
3489    '''
3490    twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode)
3491    twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode)
3492
3493    vqnegCode = '''
3494        FPSCR fpscr = (FPSCR) FpscrQc;
3495        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
3496            fpscr.qc = 1;
3497            destElem = ~srcElem1;
3498        } else {
3499            destElem = -srcElem1;
3500        }
3501        FpscrQc = fpscr;
3502    '''
3503    twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode)
3504    twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode)
3505
3506    vabsCode = '''
3507        if (srcElem1 < 0) {
3508            destElem = -srcElem1;
3509        } else {
3510            destElem = srcElem1;
3511        }
3512    '''
3513
3514    twoRegMiscInst("vabs", "NVabsD", "SimdAluOp", signedTypes, 2, vabsCode)
3515    twoRegMiscInst("vabs", "NVabsQ", "SimdAluOp", signedTypes, 4, vabsCode)
3516    vabsfpCode = '''
3517        union
3518        {
3519            uint32_t i;
3520            float f;
3521        } cStruct;
3522        cStruct.f = srcReg1;
3523        cStruct.i &= mask(sizeof(Element) * 8 - 1);
3524        destReg = cStruct.f;
3525    '''
3526    twoRegMiscInstFp("vabs", "NVabsDFp", "SimdFloatAluOp", ("float",), 2, vabsfpCode)
3527    twoRegMiscInstFp("vabs", "NVabsQFp", "SimdFloatAluOp", ("float",), 4, vabsfpCode)
3528
3529    vnegCode = '''
3530        destElem = -srcElem1;
3531    '''
3532    twoRegMiscInst("vneg", "NVnegD", "SimdAluOp", signedTypes, 2, vnegCode)
3533    twoRegMiscInst("vneg", "NVnegQ", "SimdAluOp", signedTypes, 4, vnegCode)
3534    vnegfpCode = '''
3535        destReg = -srcReg1;
3536    '''
3537    twoRegMiscInstFp("vneg", "NVnegDFp", "SimdFloatAluOp", ("float",), 2, vnegfpCode)
3538    twoRegMiscInstFp("vneg", "NVnegQFp", "SimdFloatAluOp", ("float",), 4, vnegfpCode)
3539
3540    vcgtCode = 'destElem = (srcElem1 > 0) ? mask(sizeof(Element) * 8) : 0;'
3541    twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode)
3542    twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode)
3543    vcgtfpCode = '''
3544        FPSCR fpscr = (FPSCR) FpscrExc;
3545        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc,
3546                             true, true, VfpRoundNearest);
3547        destReg = (res == 0) ? -1 : 0;
3548        if (res == 2.0)
3549            fpscr.ioc = 1;
3550        FpscrExc = fpscr;
3551    '''
3552    twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",),
3553            2, vcgtfpCode, toInt = True)
3554    twoRegMiscInstFp("vcgt", "NVcgtQFp", "SimdFloatCmpOp", ("float",),
3555            4, vcgtfpCode, toInt = True)
3556
3557    vcgeCode = 'destElem = (srcElem1 >= 0) ? mask(sizeof(Element) * 8) : 0;'
3558    twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode)
3559    twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode)
3560    vcgefpCode = '''
3561        FPSCR fpscr = (FPSCR) FpscrExc;
3562        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc,
3563                             true, true, VfpRoundNearest);
3564        destReg = (res == 0) ? -1 : 0;
3565        if (res == 2.0)
3566            fpscr.ioc = 1;
3567        FpscrExc = fpscr;
3568    '''
3569    twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",),
3570            2, vcgefpCode, toInt = True)
3571    twoRegMiscInstFp("vcge", "NVcgeQFp", "SimdFloatCmpOp", ("float",),
3572            4, vcgefpCode, toInt = True)
3573
3574    vceqCode = 'destElem = (srcElem1 == 0) ? mask(sizeof(Element) * 8) : 0;'
3575    twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode)
3576    twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode)
3577    vceqfpCode = '''
3578        FPSCR fpscr = (FPSCR) FpscrExc;
3579        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc,
3580                             true, true, VfpRoundNearest);
3581        destReg = (res == 0) ? -1 : 0;
3582        if (res == 2.0)
3583            fpscr.ioc = 1;
3584        FpscrExc = fpscr;
3585    '''
3586    twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",),
3587            2, vceqfpCode, toInt = True)
3588    twoRegMiscInstFp("vceq", "NVceqQFp", "SimdFloatCmpOp", ("float",),
3589            4, vceqfpCode, toInt = True)
3590
3591    vcleCode = 'destElem = (srcElem1 <= 0) ? mask(sizeof(Element) * 8) : 0;'
3592    twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode)
3593    twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode)
3594    vclefpCode = '''
3595        FPSCR fpscr = (FPSCR) FpscrExc;
3596        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc,
3597                             true, true, VfpRoundNearest);
3598        destReg = (res == 0) ? -1 : 0;
3599        if (res == 2.0)
3600            fpscr.ioc = 1;
3601        FpscrExc = fpscr;
3602    '''
3603    twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",),
3604            2, vclefpCode, toInt = True)
3605    twoRegMiscInstFp("vcle", "NVcleQFp", "SimdFloatCmpOp", ("float",),
3606            4, vclefpCode, toInt = True)
3607
3608    vcltCode = 'destElem = (srcElem1 < 0) ? mask(sizeof(Element) * 8) : 0;'
3609    twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode)
3610    twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode)
3611    vcltfpCode = '''
3612        FPSCR fpscr = (FPSCR) FpscrExc;
3613        float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc,
3614                             true, true, VfpRoundNearest);
3615        destReg = (res == 0) ? -1 : 0;
3616        if (res == 2.0)
3617            fpscr.ioc = 1;
3618        FpscrExc = fpscr;
3619    '''
3620    twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",),
3621            2, vcltfpCode, toInt = True)
3622    twoRegMiscInstFp("vclt", "NVcltQFp", "SimdFloatCmpOp", ("float",),
3623            4, vcltfpCode, toInt = True)
3624
3625    vswpCode = '''
3626        FloatRegBits mid;
3627        for (unsigned r = 0; r < rCount; r++) {
3628            mid = srcReg1.regs[r];
3629            srcReg1.regs[r] = destReg.regs[r];
3630            destReg.regs[r] = mid;
3631        }
3632    '''
3633    twoRegMiscScramble("vswp", "NVswpD", "SimdAluOp", ("uint64_t",), 2, vswpCode)
3634    twoRegMiscScramble("vswp", "NVswpQ", "SimdAluOp", ("uint64_t",), 4, vswpCode)
3635
3636    vtrnCode = '''
3637        Element mid;
3638        for (unsigned i = 0; i < eCount; i += 2) {
3639            mid = srcReg1.elements[i];
3640            srcReg1.elements[i] = destReg.elements[i + 1];
3641            destReg.elements[i + 1] = mid;
3642        }
3643    '''
3644    twoRegMiscScramble("vtrn", "NVtrnD", "SimdAluOp",
3645            smallUnsignedTypes, 2, vtrnCode)
3646    twoRegMiscScramble("vtrn", "NVtrnQ", "SimdAluOp",
3647            smallUnsignedTypes, 4, vtrnCode)
3648
3649    vuzpCode = '''
3650        Element mid[eCount];
3651        memcpy(&mid, &srcReg1, sizeof(srcReg1));
3652        for (unsigned i = 0; i < eCount / 2; i++) {
3653            srcReg1.elements[i] = destReg.elements[2 * i + 1];
3654            srcReg1.elements[eCount / 2 + i] = mid[2 * i + 1];
3655            destReg.elements[i] = destReg.elements[2 * i];
3656        }
3657        for (unsigned i = 0; i < eCount / 2; i++) {
3658            destReg.elements[eCount / 2 + i] = mid[2 * i];
3659        }
3660    '''
3661    twoRegMiscScramble("vuzp", "NVuzpD", "SimdAluOp", unsignedTypes, 2, vuzpCode)
3662    twoRegMiscScramble("vuzp", "NVuzpQ", "SimdAluOp", unsignedTypes, 4, vuzpCode)
3663
3664    vzipCode = '''
3665        Element mid[eCount];
3666        memcpy(&mid, &destReg, sizeof(destReg));
3667        for (unsigned i = 0; i < eCount / 2; i++) {
3668            destReg.elements[2 * i] = mid[i];
3669            destReg.elements[2 * i + 1] = srcReg1.elements[i];
3670        }
3671        for (int i = 0; i < eCount / 2; i++) {
3672            srcReg1.elements[2 * i] = mid[eCount / 2 + i];
3673            srcReg1.elements[2 * i + 1] = srcReg1.elements[eCount / 2 + i];
3674        }
3675    '''
3676    twoRegMiscScramble("vzip", "NVzipD", "SimdAluOp", unsignedTypes, 2, vzipCode)
3677    twoRegMiscScramble("vzip", "NVzipQ", "SimdAluOp", unsignedTypes, 4, vzipCode)
3678
3679    vmovnCode = 'destElem = srcElem1;'
3680    twoRegNarrowMiscInst("vmovn", "NVmovn", "SimdMiscOp", smallUnsignedTypes, vmovnCode)
3681
3682    vdupCode = 'destElem = srcElem1;'
3683    twoRegMiscScInst("vdup", "NVdupD", "SimdAluOp", smallUnsignedTypes, 2, vdupCode)
3684    twoRegMiscScInst("vdup", "NVdupQ", "SimdAluOp", smallUnsignedTypes, 4, vdupCode)
3685
3686    def vdupGprInst(name, Name, opClass, types, rCount):
3687        global header_output, exec_output
3688        eWalkCode = '''
3689        RegVect destReg;
3690        for (unsigned i = 0; i < eCount; i++) {
3691            destReg.elements[i] = htog((Element)Op1);
3692        }
3693        '''
3694        for reg in range(rCount):
3695            eWalkCode += '''
3696            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3697            ''' % { "reg" : reg }
3698        iop = InstObjParams(name, Name,
3699                            "RegRegOp",
3700                            { "code": eWalkCode,
3701                              "r_count": rCount,
3702                              "predicate_test": predicateTest,
3703                              "op_class": opClass }, [])
3704        header_output += NeonRegRegOpDeclare.subst(iop)
3705        exec_output += NeonEqualRegExecute.subst(iop)
3706        for type in types:
3707            substDict = { "targs" : type,
3708                          "class_name" : Name }
3709            exec_output += NeonExecDeclare.subst(substDict)
3710    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
3711    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
3712
3713    vmovCode = 'destElem = imm;'
3714    oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
3715    oneRegImmInst("vmov", "NVmoviQ", "SimdMiscOp", ("uint64_t",), 4, vmovCode)
3716
3717    vorrCode = 'destElem |= imm;'
3718    oneRegImmInst("vorr", "NVorriD", "SimdAluOp", ("uint64_t",), 2, vorrCode, True)
3719    oneRegImmInst("vorr", "NVorriQ", "SimdAluOp", ("uint64_t",), 4, vorrCode, True)
3720
3721    vmvnCode = 'destElem = ~imm;'
3722    oneRegImmInst("vmvn", "NVmvniD", "SimdAluOp", ("uint64_t",), 2, vmvnCode)
3723    oneRegImmInst("vmvn", "NVmvniQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode)
3724
3725    vbicCode = 'destElem &= ~imm;'
3726    oneRegImmInst("vbic", "NVbiciD", "SimdAluOp", ("uint64_t",), 2, vbicCode, True)
3727    oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True)
3728
3729    vqmovnCode = '''
3730    FPSCR fpscr = (FPSCR) FpscrQc;
3731    destElem = srcElem1;
3732    if ((BigElement)destElem != srcElem1) {
3733        fpscr.qc = 1;
3734        destElem = mask(sizeof(Element) * 8 - 1);
3735        if (srcElem1 < 0)
3736            destElem = ~destElem;
3737    }
3738    FpscrQc = fpscr;
3739    '''
3740    twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode)
3741
3742    vqmovunCode = '''
3743    FPSCR fpscr = (FPSCR) FpscrQc;
3744    destElem = srcElem1;
3745    if ((BigElement)destElem != srcElem1) {
3746        fpscr.qc = 1;
3747        destElem = mask(sizeof(Element) * 8);
3748    }
3749    FpscrQc = fpscr;
3750    '''
3751    twoRegNarrowMiscInst("vqmovun", "NVqmovun",
3752            "SimdMiscOp", smallUnsignedTypes, vqmovunCode)
3753
3754    vqmovunsCode = '''
3755    FPSCR fpscr = (FPSCR) FpscrQc;
3756    destElem = srcElem1;
3757    if (srcElem1 < 0 ||
3758            ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
3759        fpscr.qc = 1;
3760        destElem = mask(sizeof(Element) * 8);
3761        if (srcElem1 < 0)
3762            destElem = ~destElem;
3763    }
3764    FpscrQc = fpscr;
3765    '''
3766    twoRegNarrowMiscInst("vqmovun", "NVqmovuns",
3767            "SimdMiscOp", smallSignedTypes, vqmovunsCode)
3768
3769    def buildVext(name, Name, opClass, types, rCount, op):
3770        global header_output, exec_output
3771        eWalkCode = '''
3772        RegVect srcReg1, srcReg2, destReg;
3773        '''
3774        for reg in range(rCount):
3775            eWalkCode += simdEnabledCheckCode + '''
3776                srcReg1.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);
3777                srcReg2.regs[%(reg)d] = htog(FpOp2P%(reg)d_uw);
3778            ''' % { "reg" : reg }
3779        eWalkCode += op
3780        for reg in range(rCount):
3781            eWalkCode += '''
3782            FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
3783            ''' % { "reg" : reg }
3784        iop = InstObjParams(name, Name,
3785                            "RegRegRegImmOp",
3786                            { "code": eWalkCode,
3787                              "r_count": rCount,
3788                              "predicate_test": predicateTest,
3789                              "op_class": opClass }, [])
3790        header_output += NeonRegRegRegImmOpDeclare.subst(iop)
3791        exec_output += NeonEqualRegExecute.subst(iop)
3792        for type in types:
3793            substDict = { "targs" : type,
3794                          "class_name" : Name }
3795            exec_output += NeonExecDeclare.subst(substDict)
3796
3797    vextCode = '''
3798        for (unsigned i = 0; i < eCount; i++) {
3799            unsigned index = i + imm;
3800            if (index < eCount) {
3801                destReg.elements[i] = srcReg1.elements[index];
3802            } else {
3803                index -= eCount;
3804                if (index >= eCount) {
3805                    fault = new UndefinedInstruction(machInst, false, mnemonic);
3806                } else {
3807                    destReg.elements[i] = srcReg2.elements[index];
3808                }
3809            }
3810        }
3811    '''
3812    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
3813    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
3814
3815    def buildVtbxl(name, Name, opClass, length, isVtbl):
3816        global header_output, decoder_output, exec_output
3817        code = '''
3818            union
3819            {
3820                uint8_t bytes[32];
3821                FloatRegBits regs[8];
3822            } table;
3823
3824            union
3825            {
3826                uint8_t bytes[8];
3827                FloatRegBits regs[2];
3828            } destReg, srcReg2;
3829
3830            const unsigned length = %(length)d;
3831            const bool isVtbl = %(isVtbl)s;
3832
3833            srcReg2.regs[0] = htog(FpOp2P0_uw);
3834            srcReg2.regs[1] = htog(FpOp2P1_uw);
3835
3836            destReg.regs[0] = htog(FpDestP0_uw);
3837            destReg.regs[1] = htog(FpDestP1_uw);
3838        ''' % { "length" : length, "isVtbl" : isVtbl }
3839        for reg in range(8):
3840            if reg < length * 2:
3841                code += 'table.regs[%(reg)d] = htog(FpOp1P%(reg)d_uw);\n' % \
3842                        { "reg" : reg }
3843            else:
3844                code += 'table.regs[%(reg)d] = 0;\n' % { "reg" : reg }
3845        code += '''
3846        for (unsigned i = 0; i < sizeof(destReg); i++) {
3847            uint8_t index = srcReg2.bytes[i];
3848            if (index < 8 * length) {
3849                destReg.bytes[i] = table.bytes[index];
3850            } else {
3851                if (isVtbl)
3852                    destReg.bytes[i] = 0;
3853                // else destReg.bytes[i] unchanged
3854            }
3855        }
3856
3857        FpDestP0_uw = gtoh(destReg.regs[0]);
3858        FpDestP1_uw = gtoh(destReg.regs[1]);
3859        '''
3860        iop = InstObjParams(name, Name,
3861                            "RegRegRegOp",
3862                            { "code": code,
3863                              "predicate_test": predicateTest,
3864                              "op_class": opClass }, [])
3865        header_output += RegRegRegOpDeclare.subst(iop)
3866        decoder_output += RegRegRegOpConstructor.subst(iop)
3867        exec_output += PredOpExecute.subst(iop)
3868
3869    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
3870    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
3871    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
3872    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
3873
3874    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
3875    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
3876    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
3877    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
3878}};
3879